1 | /* |
2 | * Copyright 2018-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <atomic> |
20 | #include <cstdint> |
21 | #include <memory> |
22 | #include <mutex> |
23 | |
24 | #include <folly/Indestructible.h> |
25 | #include <folly/experimental/ReadMostlySharedPtr.h> |
26 | #include <folly/synchronization/Rcu.h> |
27 | |
28 | namespace folly { |
29 | |
30 | namespace detail { |
31 | struct AtomicReadMostlyTag; |
32 | extern Indestructible<std::mutex> atomicReadMostlyMu; |
33 | extern Indestructible<rcu_domain<AtomicReadMostlyTag>> atomicReadMostlyDomain; |
34 | } // namespace detail |
35 | |
36 | /* |
37 | * What atomic_shared_ptr is to shared_ptr, AtomicReadMostlyMainPtr is to |
38 | * ReadMostlyMainPtr; it allows racy conflicting accesses to one. This gives |
39 | * true shared_ptr-like semantics, including reclamation at the point where the |
40 | * last pointer to an object goes away. |
41 | * |
42 | * It's about the same speed (slightly slower) as ReadMostlyMainPtr. The most |
43 | * significant feature they share is avoiding reader-reader contention and |
44 | * atomic RMWs in the absence of writes. |
45 | */ |
46 | template <typename T> |
47 | class AtomicReadMostlyMainPtr { |
48 | public: |
49 | AtomicReadMostlyMainPtr() : curMainPtrIndex_(0) {} |
50 | |
51 | explicit AtomicReadMostlyMainPtr(std::shared_ptr<T> ptr) |
52 | : curMainPtrIndex_(0) { |
53 | mainPtrs_[0] = ReadMostlyMainPtr<T>{std::move(ptr)}; |
54 | } |
55 | |
56 | void operator=(std::shared_ptr<T> desired) { |
57 | store(std::move(desired)); |
58 | } |
59 | |
60 | bool is_lock_free() const { |
61 | return false; |
62 | } |
63 | |
64 | ReadMostlySharedPtr<T> load( |
65 | std::memory_order order = std::memory_order_seq_cst) const { |
66 | auto token = detail::atomicReadMostlyDomain->lock_shared(); |
67 | // Synchronization point with the store in storeLocked(). |
68 | auto index = curMainPtrIndex_.load(order); |
69 | auto result = mainPtrs_[index].getShared(); |
70 | detail::atomicReadMostlyDomain->unlock_shared(std::move(token)); |
71 | return result; |
72 | } |
73 | |
74 | void store( |
75 | std::shared_ptr<T> ptr, |
76 | std::memory_order order = std::memory_order_seq_cst) { |
77 | std::shared_ptr<T> old; |
78 | { |
79 | std::lock_guard<std::mutex> lg(*detail::atomicReadMostlyMu); |
80 | old = exchangeLocked(std::move(ptr), order); |
81 | } |
82 | // If ~T() runs (triggered by the shared_ptr refcount decrement), it's here, |
83 | // after dropping the lock. This avoids a possible (albeit esoteric) |
84 | // deadlock if ~T() modifies the AtomicReadMostlyMainPtr that used to point |
85 | // to it. |
86 | } |
87 | |
88 | std::shared_ptr<T> exchange( |
89 | std::shared_ptr<T> ptr, |
90 | std::memory_order order = std::memory_order_seq_cst) { |
91 | std::lock_guard<std::mutex> lg(*detail::atomicReadMostlyMu); |
92 | return exchangeLocked(std::move(ptr), order); |
93 | } |
94 | |
95 | bool compare_exchange_weak( |
96 | std::shared_ptr<T>& expected, |
97 | const std::shared_ptr<T>& desired, |
98 | std::memory_order successOrder = std::memory_order_seq_cst, |
99 | std::memory_order failureOrder = std::memory_order_seq_cst) { |
100 | return compare_exchange_strong( |
101 | expected, desired, successOrder, failureOrder); |
102 | } |
103 | |
104 | bool compare_exchange_strong( |
105 | std::shared_ptr<T>& expected, |
106 | const std::shared_ptr<T>& desired, |
107 | std::memory_order successOrder = std::memory_order_seq_cst, |
108 | std::memory_order failureOrder = std::memory_order_seq_cst) { |
109 | // See the note at the end of store; we need to defer any destruction we |
110 | // might trigger until after the lock is released. |
111 | // This is not actually needed down the success path (the reference passed |
112 | // in as expected is another pointer to the same object, so we won't |
113 | // decrement the refcount to 0), but "never decrement a refcount while |
114 | // holding a lock" is an easier rule to keep in our heads, and costs us |
115 | // nothing. |
116 | std::shared_ptr<T> prev; |
117 | std::shared_ptr<T> expectedDup; |
118 | { |
119 | std::lock_guard<std::mutex> lg(*detail::atomicReadMostlyMu); |
120 | auto index = curMainPtrIndex_.load(failureOrder); |
121 | ReadMostlyMainPtr<T>& oldMain = mainPtrs_[index]; |
122 | if (oldMain.get() != expected.get()) { |
123 | expectedDup = std::move(expected); |
124 | expected = oldMain.getStdShared(); |
125 | return false; |
126 | } |
127 | prev = exchangeLocked(desired, successOrder); |
128 | } |
129 | return true; |
130 | } |
131 | |
132 | private: |
133 | // Must hold the global mutex. |
134 | std::shared_ptr<T> exchangeLocked( |
135 | std::shared_ptr<T> ptr, |
136 | std::memory_order order = std::memory_order_seq_cst) { |
137 | // This is where the tricky bits happen; all modifications of the mainPtrs_ |
138 | // and index happen here. We maintain the invariant that, on entry to this |
139 | // method, all read-side critical sections in progress are using the version |
140 | // indicated by curMainPtrIndex_, and the other version is nulled out. |
141 | // (Readers can still hold a ReadMostlySharedPtr to the thing the old |
142 | // version used to point to; they just can't access the old version to get |
143 | // that handle any more). |
144 | auto index = curMainPtrIndex_.load(std::memory_order_relaxed); |
145 | ReadMostlyMainPtr<T>& oldMain = mainPtrs_[index]; |
146 | ReadMostlyMainPtr<T>& newMain = mainPtrs_[1 - index]; |
147 | DCHECK(newMain.get() == nullptr) |
148 | << "Invariant should ensure that at most one version is non-null" ; |
149 | newMain.reset(std::move(ptr)); |
150 | // If order is acq_rel, it should degrade to just release, since this is a |
151 | // store rather than an RMW. (Of course, this is such a slow method that we |
152 | // don't really care, but precision is its own reward. If TSAN one day |
153 | // understands asymmetric barriers, this will also improve its error |
154 | // detection here). We get our "acquire-y-ness" from the mutex. |
155 | auto realOrder = |
156 | (order == std::memory_order_acq_rel ? std::memory_order_release |
157 | : order); |
158 | // After this, read-side critical sections can access both versions, but |
159 | // new ones will use newMain. |
160 | // This is also synchronization point with loads. |
161 | curMainPtrIndex_.store(1 - index, realOrder); |
162 | // Wait for all read-side critical sections using oldMain to finish. |
163 | detail::atomicReadMostlyDomain->synchronize(); |
164 | // We've reestablished the first half of the invariant (all readers are |
165 | // using newMain), now let's establish the other one (that the other pointer |
166 | // is null). |
167 | auto result = oldMain.getStdShared(); |
168 | oldMain.reset(); |
169 | return result; |
170 | } |
171 | |
172 | // The right way to think of this implementation is as an |
173 | // std::atomic<ReadMostlyMainPtr<T>*>, protected by RCU. There's only two |
174 | // tricky parts: |
175 | // 1. We give ourselves our own RCU domain, and synchronize on modification, |
176 | // so that we don't do any batching of deallocations. This gives |
177 | // shared_ptr-like eager reclamation semantics. |
178 | // 2. Instead of putting the ReadMostlyMainPtrs on the heap, we keep them as |
179 | // part of the same object to improve locality. |
180 | |
181 | // Really, just a 0/1 index. This is also the synchronization point for memory |
182 | // orders. |
183 | std::atomic<uint8_t> curMainPtrIndex_; |
184 | |
185 | // Both the ReadMostlyMainPtrs themselves and the domain have nontrivial |
186 | // indirections even on the read path, and asymmetric barriers on the write |
187 | // path. Some of these could be fused as a later optimization, at the cost of |
188 | // having to put more tricky threading primitives in this class that are |
189 | // currently abstracted out by those. |
190 | ReadMostlyMainPtr<T> mainPtrs_[2]; |
191 | }; |
192 | |
193 | } // namespace folly |
194 | |