RWSpinLock.h source code [folly/synchronization/RWSpinLock.h]

1	/*
2	* Copyright 2011-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	/*
17	* N.B. You most likely do _not_ want to use RWSpinLock or any other
18	* kind of spinlock. Use SharedMutex instead.
19	*
20	* In short, spinlocks in preemptive multi-tasking operating systems
21	* have serious problems and fast mutexes like SharedMutex are almost
22	* certainly the better choice, because letting the OS scheduler put a
23	* thread to sleep is better for system responsiveness and throughput
24	* than wasting a timeslice repeatedly querying a lock held by a
25	* thread that's blocked, and you can't prevent userspace
26	* programs blocking.
27	*
28	* Spinlocks in an operating system kernel make much more sense than
29	* they do in userspace.
30	*
31	* -------------------------------------------------------------------
32	*
33	* Two Read-Write spin lock implementations.
34	*
35	* Ref: http://locklessinc.com/articles/locks
36	*
37	* Both locks here are faster than pthread_rwlock and have very low
38	* overhead (usually 20-30ns). They don't use any system mutexes and
39	* are very compact (4/8 bytes), so are suitable for per-instance
40	* based locking, particularly when contention is not expected.
41	*
42	* For a spinlock, RWSpinLock is a reasonable choice. (See the note
43	* about for why a spin lock is frequently a bad idea generally.)
44	* RWSpinLock has minimal overhead, and comparable contention
45	* performance when the number of competing threads is less than or
46	* equal to the number of logical CPUs. Even as the number of
47	* threads gets larger, RWSpinLock can still be very competitive in
48	* READ, although it is slower on WRITE, and also inherently unfair
49	* to writers.
50	*
51	* RWTicketSpinLock shows more balanced READ/WRITE performance. If
52	* your application really needs a lot more threads, and a
53	* higher-priority writer, prefer one of the RWTicketSpinLock locks.
54	*
55	* Caveats:
56	*
57	* RWTicketSpinLock locks can only be used with GCC on x86/x86-64
58	* based systems.
59	*
60	* RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent
61	* readers and writers.
62	*
63	* RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
64	* readers and writers.
65	*
66	* RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
67	* writer priority) is NOT reentrant, even for lock_shared().
68	*
69	* The lock will not grant any new shared (read) accesses while a thread
70	* attempting to acquire the lock in write mode is blocked. (That is,
71	* if the lock is held in shared mode by N threads, and a thread attempts
72	* to acquire it in write mode, no one else can acquire it in shared mode
73	* until these N threads release the lock and then the blocked thread
74	* acquires and releases the exclusive lock.) This also applies for
75	* attempts to reacquire the lock in shared mode by threads that already
76	* hold it in shared mode, making the lock non-reentrant.
77	*
78	* RWSpinLock handles 2^30 - 1 concurrent readers.
79	*
80	* @author Xin Liu <xliux@fb.com>
81	*/
82
83	#pragma once
84
85	/*
86	========================================================================
87	Benchmark on (Intel(R) Xeon(R) CPU L5630 @ 2.13GHz) 8 cores(16 HTs)
88	========================================================================
89
90	------------------------------------------------------------------------------
91	1. Single thread benchmark (read/write lock + unlock overhead)
92	Benchmark Iters Total t t/iter iter/sec
93	-------------------------------------------------------------------------------
94	* BM_RWSpinLockRead 100000 1.786 ms 17.86 ns 53.4M
95	+30.5% BM_RWSpinLockWrite 100000 2.331 ms 23.31 ns 40.91M
96	+85.7% BM_RWTicketSpinLock32Read 100000 3.317 ms 33.17 ns 28.75M
97	+96.0% BM_RWTicketSpinLock32Write 100000 3.5 ms 35 ns 27.25M
98	+85.6% BM_RWTicketSpinLock64Read 100000 3.315 ms 33.15 ns 28.77M
99	+96.0% BM_RWTicketSpinLock64Write 100000 3.5 ms 35 ns 27.25M
100	+85.7% BM_RWTicketSpinLock32FavorWriterRead 100000 3.317 ms 33.17 ns 28.75M
101	+29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000 2.316 ms 23.16 ns 41.18M
102	+85.3% BM_RWTicketSpinLock64FavorWriterRead 100000 3.309 ms 33.09 ns 28.82M
103	+30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000 2.325 ms 23.25 ns 41.02M
104	+ 175% BM_PThreadRWMutexRead 100000 4.917 ms 49.17 ns 19.4M
105	+ 166% BM_PThreadRWMutexWrite 100000 4.757 ms 47.57 ns 20.05M
106
107	------------------------------------------------------------------------------
108	2. Contention Benchmark 90% read 10% write
109	Benchmark hits average min max sigma
110	------------------------------------------------------------------------------
111	---------- 8 threads ------------
112	RWSpinLock Write 142666 220ns 78ns 40.8us 269ns
113	RWSpinLock Read 1282297 222ns 80ns 37.7us 248ns
114	RWTicketSpinLock Write 85692 209ns 71ns 17.9us 252ns
115	RWTicketSpinLock Read 769571 215ns 78ns 33.4us 251ns
116	pthread_rwlock_t Write 84248 2.48us 99ns 269us 8.19us
117	pthread_rwlock_t Read 761646 933ns 101ns 374us 3.25us
118
119	---------- 16 threads ------------
120	RWSpinLock Write 124236 237ns 78ns 261us 801ns
121	RWSpinLock Read 1115807 236ns 78ns 2.27ms 2.17us
122	RWTicketSpinLock Write 81781 231ns 71ns 31.4us 351ns
123	RWTicketSpinLock Read 734518 238ns 78ns 73.6us 379ns
124	pthread_rwlock_t Write 83363 7.12us 99ns 785us 28.1us
125	pthread_rwlock_t Read 754978 2.18us 101ns 1.02ms 14.3us
126
127	---------- 50 threads ------------
128	RWSpinLock Write 131142 1.37us 82ns 7.53ms 68.2us
129	RWSpinLock Read 1181240 262ns 78ns 6.62ms 12.7us
130	RWTicketSpinLock Write 83045 397ns 73ns 7.01ms 31.5us
131	RWTicketSpinLock Read 744133 386ns 78ns 11ms 31.4us
132	pthread_rwlock_t Write 80849 112us 103ns 4.52ms 263us
133	pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us
134
135	*/
136
137	#include <folly/Portability.h>
138	#include <folly/portability/Asm.h>
139
140	#if defined(__GNUC__) && (defined(__i386) \|\| FOLLY_X64 \|\| defined(ARCH_K8))
141	#define RW_SPINLOCK_USE_X86_INTRINSIC_
142	#include <x86intrin.h>
143	#elif defined(_MSC_VER) && defined(FOLLY_X64)
144	#define RW_SPINLOCK_USE_X86_INTRINSIC_
145	#elif FOLLY_AARCH64
146	#define RW_SPINLOCK_USE_X86_INTRINSIC_
147	#else
148	#undef RW_SPINLOCK_USE_X86_INTRINSIC_
149	#endif
150
151	// iOS doesn't define _mm_cvtsi64_si128 and friends
152	#if (FOLLY_SSE >= 2) && !FOLLY_MOBILE && FOLLY_X64
153	#define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
154	#else
155	#undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
156	#endif
157
158	#include <algorithm>
159	#include <atomic>
160	#include <thread>
161
162	#include <folly/Likely.h>
163
164	namespace folly {
165
166	/*
167	* A simple, small (4-bytes), but unfair rwlock. Use it when you want
168	* a nice writer and don't expect a lot of write/read contention, or
169	* when you need small rwlocks since you are creating a large number
170	* of them.
171	*
172	* Note that the unfairness here is extreme: if the lock is
173	* continually accessed for read, writers will never get a chance. If
174	* the lock can be that highly contended this class is probably not an
175	* ideal choice anyway.
176	*
177	* It currently implements most of the Lockable, SharedLockable and
178	* UpgradeLockable concepts except the TimedLockable related locking/unlocking
179	* interfaces.
180	*/
181	class RWSpinLock {
182	enum : int32_t { READER = `4`, UPGRADED = `2`, WRITER = `1` };
183
184	public:
185	constexpr RWSpinLock() : bits_(`0`) {}
186
187	RWSpinLock(RWSpinLock const&) = delete;
188	RWSpinLock& operator=(RWSpinLock const&) = delete;
189
190	// Lockable Concept
191	void lock() {
192	uint_fast32_t count = `0`;
193	while (!LIKELY(try_lock())) {
194	if (++count > `1000`) {
195	std::this_thread::yield();
196	}
197	}
198	}
199
200	// Writer is responsible for clearing up both the UPGRADED and WRITER bits.
201	void unlock() {
202	static_assert(READER > WRITER + UPGRADED, "wrong bits!");
203	bits_.fetch_and(~(WRITER \| UPGRADED), std::memory_order_release);
204	}
205
206	// SharedLockable Concept
207	void lock_shared() {
208	uint_fast32_t count = `0`;
209	while (!LIKELY(try_lock_shared())) {
210	if (++count > `1000`) {
211	std::this_thread::yield();
212	}
213	}
214	}
215
216	void unlock_shared() {
217	bits_.fetch_add(-READER, std::memory_order_release);
218	}
219
220	// Downgrade the lock from writer status to reader status.
221	void unlock_and_lock_shared() {
222	bits_.fetch_add(READER, std::memory_order_acquire);
223	unlock();
224	}
225
226	// UpgradeLockable Concept
227	void lock_upgrade() {
228	uint_fast32_t count = `0`;
229	while (!try_lock_upgrade()) {
230	if (++count > `1000`) {
231	std::this_thread::yield();
232	}
233	}
234	}
235
236	void unlock_upgrade() {
237	bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel);
238	}
239
240	// unlock upgrade and try to acquire write lock
241	void unlock_upgrade_and_lock() {
242	int64_t count = `0`;
243	while (!try_unlock_upgrade_and_lock()) {
244	if (++count > `1000`) {
245	std::this_thread::yield();
246	}
247	}
248	}
249
250	// unlock upgrade and read lock atomically
251	void unlock_upgrade_and_lock_shared() {
252	bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel);
253	}
254
255	// write unlock and upgrade lock atomically
256	void unlock_and_lock_upgrade() {
257	// need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
258	// the same time when other threads are trying do try_lock_upgrade().
259	bits_.fetch_or(UPGRADED, std::memory_order_acquire);
260	bits_.fetch_add(-WRITER, std::memory_order_release);
261	}
262
263	// Attempt to acquire writer permission. Return false if we didn't get it.
264	bool try_lock() {
265	int32_t expect = `0`;
266	return bits_.compare_exchange_strong(
267	expect, WRITER, std::memory_order_acq_rel);
268	}
269
270	// Try to get reader permission on the lock. This can fail if we
271	// find out someone is a writer or upgrader.
272	// Setting the UPGRADED bit would allow a writer-to-be to indicate
273	// its intention to write and block any new readers while waiting
274	// for existing readers to finish and release their read locks. This
275	// helps avoid starving writers (promoted from upgraders).
276	bool try_lock_shared() {
277	// fetch_add is considerably (100%) faster than compare_exchange,
278	// so here we are optimizing for the common (lock success) case.
279	int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
280	if (UNLIKELY(value & (WRITER \| UPGRADED))) {
281	bits_.fetch_add(-READER, std::memory_order_release);
282	return false;
283	}
284	return true;
285	}
286
287	// try to unlock upgrade and write lock atomically
288	bool try_unlock_upgrade_and_lock() {
289	int32_t expect = UPGRADED;
290	return bits_.compare_exchange_strong(
291	expect, WRITER, std::memory_order_acq_rel);
292	}
293
294	// try to acquire an upgradable lock.
295	bool try_lock_upgrade() {
296	int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire);
297
298	// Note: when failed, we cannot flip the UPGRADED bit back,
299	// as in this case there is either another upgrade lock or a write lock.
300	// If it's a write lock, the bit will get cleared up when that lock's done
301	// with unlock().
302	return ((value & (UPGRADED \| WRITER)) == `0`);
303	}
304
305	// mainly for debugging purposes.
306	int32_t bits() const {
307	return bits_.load(std::memory_order_acquire);
308	}
309
310	class ReadHolder;
311	class UpgradedHolder;
312	class WriteHolder;
313
314	class ReadHolder {
315	public:
316	explicit ReadHolder(RWSpinLock* lock) : lock_(lock) {
317	if (lock_) {
318	lock_->lock_shared();
319	}
320	}
321
322	explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
323	lock_->lock_shared();
324	}
325
326	ReadHolder(ReadHolder&& other) noexcept : lock_(other.lock_) {
327	other.lock_ = nullptr;
328	}
329
330	// down-grade
331	explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) {
332	upgraded.lock_ = nullptr;
333	if (lock_) {
334	lock_->unlock_upgrade_and_lock_shared();
335	}
336	}
337
338	explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) {
339	writer.lock_ = nullptr;
340	if (lock_) {
341	lock_->unlock_and_lock_shared();
342	}
343	}
344
345	ReadHolder& operator=(ReadHolder&& other) {
346	using std::swap;
347	swap(lock_, other.lock_);
348	return *this;
349	}
350
351	ReadHolder(const ReadHolder& other) = delete;
352	ReadHolder& operator=(const ReadHolder& other) = delete;
353
354	~ReadHolder() {
355	if (lock_) {
356	lock_->unlock_shared();
357	}
358	}
359
360	void reset(RWSpinLock* lock = nullptr) {
361	if (lock == lock_) {
362	return;
363	}
364	if (lock_) {
365	lock_->unlock_shared();
366	}
367	lock_ = lock;
368	if (lock_) {
369	lock_->lock_shared();
370	}
371	}
372
373	void swap(ReadHolder* other) {
374	std::swap(lock_, other->lock_);
375	}
376
377	private:
378	friend class UpgradedHolder;
379	friend class WriteHolder;
380	RWSpinLock* lock_;
381	};
382
383	class UpgradedHolder {
384	public:
385	explicit UpgradedHolder(RWSpinLock* lock) : lock_(lock) {
386	if (lock_) {
387	lock_->lock_upgrade();
388	}
389	}
390
391	explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) {
392	lock_->lock_upgrade();
393	}
394
395	explicit UpgradedHolder(WriteHolder&& writer) {
396	lock_ = writer.lock_;
397	writer.lock_ = nullptr;
398	if (lock_) {
399	lock_->unlock_and_lock_upgrade();
400	}
401	}
402
403	UpgradedHolder(UpgradedHolder&& other) noexcept : lock_(other.lock_) {
404	other.lock_ = nullptr;
405	}
406
407	UpgradedHolder& operator=(UpgradedHolder&& other) {
408	using std::swap;
409	swap(lock_, other.lock_);
410	return *this;
411	}
412
413	UpgradedHolder(const UpgradedHolder& other) = delete;
414	UpgradedHolder& operator=(const UpgradedHolder& other) = delete;
415
416	~UpgradedHolder() {
417	if (lock_) {
418	lock_->unlock_upgrade();
419	}
420	}
421
422	void reset(RWSpinLock* lock = nullptr) {
423	if (lock == lock_) {
424	return;
425	}
426	if (lock_) {
427	lock_->unlock_upgrade();
428	}
429	lock_ = lock;
430	if (lock_) {
431	lock_->lock_upgrade();
432	}
433	}
434
435	void swap(UpgradedHolder* other) {
436	using std::swap;
437	swap(lock_, other->lock_);
438	}
439
440	private:
441	friend class WriteHolder;
442	friend class ReadHolder;
443	RWSpinLock* lock_;
444	};
445
446	class WriteHolder {
447	public:
448	explicit WriteHolder(RWSpinLock* lock) : lock_(lock) {
449	if (lock_) {
450	lock_->lock();
451	}
452	}
453
454	explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
455	lock_->lock();
456	}
457
458	// promoted from an upgrade lock holder
459	explicit WriteHolder(UpgradedHolder&& upgraded) {
460	lock_ = upgraded.lock_;
461	upgraded.lock_ = nullptr;
462	if (lock_) {
463	lock_->unlock_upgrade_and_lock();
464	}
465	}
466
467	WriteHolder(WriteHolder&& other) noexcept : lock_(other.lock_) {
468	other.lock_ = nullptr;
469	}
470
471	WriteHolder& operator=(WriteHolder&& other) {
472	using std::swap;
473	swap(lock_, other.lock_);
474	return *this;
475	}
476
477	WriteHolder(const WriteHolder& other) = delete;
478	WriteHolder& operator=(const WriteHolder& other) = delete;
479
480	~WriteHolder() {
481	if (lock_) {
482	lock_->unlock();
483	}
484	}
485
486	void reset(RWSpinLock* lock = nullptr) {
487	if (lock == lock_) {
488	return;
489	}
490	if (lock_) {
491	lock_->unlock();
492	}
493	lock_ = lock;
494	if (lock_) {
495	lock_->lock();
496	}
497	}
498
499	void swap(WriteHolder* other) {
500	using std::swap;
501	swap(lock_, other->lock_);
502	}
503
504	private:
505	friend class ReadHolder;
506	friend class UpgradedHolder;
507	RWSpinLock* lock_;
508	};
509
510	private:
511	std::atomic<int32_t> bits_;
512	};
513
514	#ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
515	// A more balanced Read-Write spin lock implemented based on GCC intrinsics.
516
517	namespace detail {
518	template <size_t kBitWidth>
519	struct RWTicketIntTrait {
520	static_assert(
521	kBitWidth == `32` \|\| kBitWidth == `64`,
522	"bit width has to be either 32 or 64 ");
523	};
524
525	template <>
526	struct RWTicketIntTrait<`64`> {
527	typedef uint64_t FullInt;
528	typedef uint32_t HalfInt;
529	typedef uint16_t QuarterInt;
530
531	#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
532	static __m128i make128(const uint16_t v[`4`]) {
533	return _mm_set_epi16(
534	`0`, `0`, `0`, `0`, short(v[`3`]), short(v[`2`]), short(v[`1`]), short(v[`0`]));
535	}
536	static inline __m128i fromInteger(uint64_t from) {
537	return _mm_cvtsi64_si128(int64_t(from));
538	}
539	static inline uint64_t toInteger(__m128i in) {
540	return uint64_t(_mm_cvtsi128_si64(in));
541	}
542	static inline uint64_t addParallel(__m128i in, __m128i kDelta) {
543	return toInteger(_mm_add_epi16(in, kDelta));
544	}
545	#endif
546	};
547
548	template <>
549	struct RWTicketIntTrait<`32`> {
550	typedef uint32_t FullInt;
551	typedef uint16_t HalfInt;
552	typedef uint8_t QuarterInt;
553
554	#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
555	static __m128i make128(const uint8_t v[`4`]) {
556	// clang-format off
557	return _mm_set_epi8(
558	`0`, `0`, `0`, `0`,
559	`0`, `0`, `0`, `0`,
560	`0`, `0`, `0`, `0`,
561	char(v[`3`]), char(v[`2`]), char(v[`1`]), char(v[`0`]));
562	// clang-format on
563	}
564	static inline __m128i fromInteger(uint32_t from) {
565	return _mm_cvtsi32_si128(int32_t(from));
566	}
567	static inline uint32_t toInteger(__m128i in) {
568	return uint32_t(_mm_cvtsi128_si32(in));
569	}
570	static inline uint32_t addParallel(__m128i in, __m128i kDelta) {
571	return toInteger(_mm_add_epi8(in, kDelta));
572	}
573	#endif
574	};
575	} // namespace detail
576
577	template <size_t kBitWidth, bool kFavorWriter = false>
578	class RWTicketSpinLockT {
579	typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
580	typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
581	typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
582	typedef typename detail::RWTicketIntTrait<kBitWidth>::QuarterInt QuarterInt;
583
584	union RWTicket {
585	constexpr RWTicket() : whole(`0`) {}
586	FullInt whole;
587	HalfInt readWrite;
588	__extension__ struct {
589	QuarterInt write;
590	QuarterInt read;
591	QuarterInt users;
592	};
593	} ticket;
594
595	private: // Some x64-specific utilities for atomic access to ticket.
596	template <class T>
597	static T load_acquire(T* addr) {
598	T t = addr; // acquire barrier*
599	asm_volatile_memory();
600	return t;
601	}
602
603	template <class T>
604	static void store_release(T* addr, T v) {
605	asm_volatile_memory();
606	addr = v; // release barrier*
607	}
608
609	public:
610	constexpr RWTicketSpinLockT() {}
611
612	RWTicketSpinLockT(RWTicketSpinLockT const&) = delete;
613	RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete;
614
615	void lock() {
616	if (kFavorWriter) {
617	writeLockAggressive();
618	} else {
619	writeLockNice();
620	}
621	}
622
623	/*
624	* Both try_lock and try_lock_shared diverge in our implementation from the
625	* lock algorithm described in the link above.
626	*
627	* In the read case, it is undesirable that the readers could wait
628	* for another reader (before increasing ticket.read in the other
629	* implementation). Our approach gives up on
630	* first-come-first-serve, but our benchmarks showed improve
631	* performance for both readers and writers under heavily contended
632	* cases, particularly when the number of threads exceeds the number
633	* of logical CPUs.
634	*
635	* We have writeLockAggressive() using the original implementation
636	* for a writer, which gives some advantage to the writer over the
637	* readers---for that path it is guaranteed that the writer will
638	* acquire the lock after all the existing readers exit.
639	*/
640	bool try_lock() {
641	RWTicket t;
642	FullInt old = t.whole = load_acquire(&ticket.whole);
643	if (t.users != t.write) {
644	return false;
645	}
646	++t.users;
647	return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole);
648	}
649
650	/*
651	* Call this if you want to prioritize writer to avoid starvation.
652	* Unlike writeLockNice, immediately acquires the write lock when
653	* the existing readers (arriving before the writer) finish their
654	* turns.
655	*/
656	void writeLockAggressive() {
657	// std::this_thread::yield() is needed here to avoid a pathology if the
658	// number of threads attempting concurrent writes is >= the number of real
659	// cores allocated to this process. This is less likely than the
660	// corresponding situation in lock_shared(), but we still want to
661	// avoid it
662	uint_fast32_t count = `0`;
663	QuarterInt val = __sync_fetch_and_add(&ticket.users, `1`);
664	while (val != load_acquire(&ticket.write)) {
665	asm_volatile_pause();
666	if (UNLIKELY(++count > `1000`)) {
667	std::this_thread::yield();
668	}
669	}
670	}
671
672	// Call this when the writer should be nicer to the readers.
673	void writeLockNice() {
674	// Here it doesn't cpu-relax the writer.
675	//
676	// This is because usually we have many more readers than the
677	// writers, so the writer has less chance to get the lock when
678	// there are a lot of competing readers. The aggressive spinning
679	// can help to avoid starving writers.
680	//
681	// We don't worry about std::this_thread::yield() here because the caller
682	// has already explicitly abandoned fairness.
683	while (!try_lock()) {
684	}
685	}
686
687	// Atomically unlock the write-lock from writer and acquire the read-lock.
688	void unlock_and_lock_shared() {
689	QuarterInt val = __sync_fetch_and_add(&ticket.read, `1`);
690	}
691
692	// Release writer permission on the lock.
693	void unlock() {
694	RWTicket t;
695	t.whole = load_acquire(&ticket.whole);
696
697	#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
698	FullInt old = t.whole;
699	// SSE2 can reduce the lock and unlock overhead by 10%
700	static const QuarterInt kDeltaBuf[`4`] = {`1`, `1`, `0`, `0`}; // write/read/user
701	static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
702	__m128i m = IntTraitType::fromInteger(old);
703	t.whole = IntTraitType::addParallel(m, kDelta);
704	#else
705	++t.read;
706	++t.write;
707	#endif
708	store_release(&ticket.readWrite, t.readWrite);
709	}
710
711	void lock_shared() {
712	// std::this_thread::yield() is important here because we can't grab the
713	// shared lock if there is a pending writeLockAggressive, so we
714	// need to let threads that already have a shared lock complete
715	uint_fast32_t count = `0`;
716	while (!LIKELY(try_lock_shared())) {
717	asm_volatile_pause();
718	if (UNLIKELY((++count & `1023`) == `0`)) {
719	std::this_thread::yield();
720	}
721	}
722	}
723
724	bool try_lock_shared() {
725	RWTicket t, old;
726	old.whole = t.whole = load_acquire(&ticket.whole);
727	old.users = old.read;
728	#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
729	// SSE2 may reduce the total lock and unlock overhead by 10%
730	static const QuarterInt kDeltaBuf[`4`] = {`0`, `1`, `1`, `0`}; // write/read/user
731	static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
732	__m128i m = IntTraitType::fromInteger(old.whole);
733	t.whole = IntTraitType::addParallel(m, kDelta);
734	#else
735	++t.read;
736	++t.users;
737	#endif
738	return __sync_bool_compare_and_swap(&ticket.whole, old.whole, t.whole);
739	}
740
741	void unlock_shared() {
742	__sync_fetch_and_add(&ticket.write, `1`);
743	}
744
745	class WriteHolder;
746
747	typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
748	class ReadHolder {
749	public:
750	ReadHolder(ReadHolder const&) = delete;
751	ReadHolder& operator=(ReadHolder const&) = delete;
752
753	explicit ReadHolder(RWSpinLock* lock) : lock_(lock) {
754	if (lock_) {
755	lock_->lock_shared();
756	}
757	}
758
759	explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
760	if (lock_) {
761	lock_->lock_shared();
762	}
763	}
764
765	// atomically unlock the write-lock from writer and acquire the read-lock
766	explicit ReadHolder(WriteHolder* writer) : lock_(nullptr) {
767	std::swap(this->lock_, writer->lock_);
768	if (lock_) {
769	lock_->unlock_and_lock_shared();
770	}
771	}
772
773	~ReadHolder() {
774	if (lock_) {
775	lock_->unlock_shared();
776	}
777	}
778
779	void reset(RWSpinLock* lock = nullptr) {
780	if (lock_) {
781	lock_->unlock_shared();
782	}
783	lock_ = lock;
784	if (lock_) {
785	lock_->lock_shared();
786	}
787	}
788
789	void swap(ReadHolder* other) {
790	std::swap(this->lock_, other->lock_);
791	}
792
793	private:
794	RWSpinLock* lock_;
795	};
796
797	class WriteHolder {
798	public:
799	WriteHolder(WriteHolder const&) = delete;
800	WriteHolder& operator=(WriteHolder const&) = delete;
801
802	explicit WriteHolder(RWSpinLock* lock) : lock_(lock) {
803	if (lock_) {
804	lock_->lock();
805	}
806	}
807	explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
808	if (lock_) {
809	lock_->lock();
810	}
811	}
812
813	~WriteHolder() {
814	if (lock_) {
815	lock_->unlock();
816	}
817	}
818
819	void reset(RWSpinLock* lock = nullptr) {
820	if (lock == lock_) {
821	return;
822	}
823	if (lock_) {
824	lock_->unlock();
825	}
826	lock_ = lock;
827	if (lock_) {
828	lock_->lock();
829	}
830	}
831
832	void swap(WriteHolder* other) {
833	std::swap(this->lock_, other->lock_);
834	}
835
836	private:
837	friend class ReadHolder;
838	RWSpinLock* lock_;
839	};
840	};
841
842	typedef RWTicketSpinLockT<`32`> RWTicketSpinLock32;
843	typedef RWTicketSpinLockT<`64`> RWTicketSpinLock64;
844
845	#endif // RW_SPINLOCK_USE_X86_INTRINSIC_
846
847	} // namespace folly
848
849	#ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
850	#undef RW_SPINLOCK_USE_X86_INTRINSIC_
851	#endif
852

Browse the source code of folly/synchronization/RWSpinLock.h