1/*
2 * Copyright 2018-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <atomic>
20#include <cstdint>
21#include <memory>
22#include <mutex>
23
24#include <folly/Indestructible.h>
25#include <folly/experimental/ReadMostlySharedPtr.h>
26#include <folly/synchronization/Rcu.h>
27
28namespace folly {
29
30namespace detail {
31struct AtomicReadMostlyTag;
32extern Indestructible<std::mutex> atomicReadMostlyMu;
33extern Indestructible<rcu_domain<AtomicReadMostlyTag>> atomicReadMostlyDomain;
34} // namespace detail
35
36/*
37 * What atomic_shared_ptr is to shared_ptr, AtomicReadMostlyMainPtr is to
38 * ReadMostlyMainPtr; it allows racy conflicting accesses to one. This gives
39 * true shared_ptr-like semantics, including reclamation at the point where the
40 * last pointer to an object goes away.
41 *
42 * It's about the same speed (slightly slower) as ReadMostlyMainPtr. The most
43 * significant feature they share is avoiding reader-reader contention and
44 * atomic RMWs in the absence of writes.
45 */
46template <typename T>
47class AtomicReadMostlyMainPtr {
48 public:
49 AtomicReadMostlyMainPtr() : curMainPtrIndex_(0) {}
50
51 explicit AtomicReadMostlyMainPtr(std::shared_ptr<T> ptr)
52 : curMainPtrIndex_(0) {
53 mainPtrs_[0] = ReadMostlyMainPtr<T>{std::move(ptr)};
54 }
55
56 void operator=(std::shared_ptr<T> desired) {
57 store(std::move(desired));
58 }
59
60 bool is_lock_free() const {
61 return false;
62 }
63
64 ReadMostlySharedPtr<T> load(
65 std::memory_order order = std::memory_order_seq_cst) const {
66 auto token = detail::atomicReadMostlyDomain->lock_shared();
67 // Synchronization point with the store in storeLocked().
68 auto index = curMainPtrIndex_.load(order);
69 auto result = mainPtrs_[index].getShared();
70 detail::atomicReadMostlyDomain->unlock_shared(std::move(token));
71 return result;
72 }
73
74 void store(
75 std::shared_ptr<T> ptr,
76 std::memory_order order = std::memory_order_seq_cst) {
77 std::shared_ptr<T> old;
78 {
79 std::lock_guard<std::mutex> lg(*detail::atomicReadMostlyMu);
80 old = exchangeLocked(std::move(ptr), order);
81 }
82 // If ~T() runs (triggered by the shared_ptr refcount decrement), it's here,
83 // after dropping the lock. This avoids a possible (albeit esoteric)
84 // deadlock if ~T() modifies the AtomicReadMostlyMainPtr that used to point
85 // to it.
86 }
87
88 std::shared_ptr<T> exchange(
89 std::shared_ptr<T> ptr,
90 std::memory_order order = std::memory_order_seq_cst) {
91 std::lock_guard<std::mutex> lg(*detail::atomicReadMostlyMu);
92 return exchangeLocked(std::move(ptr), order);
93 }
94
95 bool compare_exchange_weak(
96 std::shared_ptr<T>& expected,
97 const std::shared_ptr<T>& desired,
98 std::memory_order successOrder = std::memory_order_seq_cst,
99 std::memory_order failureOrder = std::memory_order_seq_cst) {
100 return compare_exchange_strong(
101 expected, desired, successOrder, failureOrder);
102 }
103
104 bool compare_exchange_strong(
105 std::shared_ptr<T>& expected,
106 const std::shared_ptr<T>& desired,
107 std::memory_order successOrder = std::memory_order_seq_cst,
108 std::memory_order failureOrder = std::memory_order_seq_cst) {
109 // See the note at the end of store; we need to defer any destruction we
110 // might trigger until after the lock is released.
111 // This is not actually needed down the success path (the reference passed
112 // in as expected is another pointer to the same object, so we won't
113 // decrement the refcount to 0), but "never decrement a refcount while
114 // holding a lock" is an easier rule to keep in our heads, and costs us
115 // nothing.
116 std::shared_ptr<T> prev;
117 std::shared_ptr<T> expectedDup;
118 {
119 std::lock_guard<std::mutex> lg(*detail::atomicReadMostlyMu);
120 auto index = curMainPtrIndex_.load(failureOrder);
121 ReadMostlyMainPtr<T>& oldMain = mainPtrs_[index];
122 if (oldMain.get() != expected.get()) {
123 expectedDup = std::move(expected);
124 expected = oldMain.getStdShared();
125 return false;
126 }
127 prev = exchangeLocked(desired, successOrder);
128 }
129 return true;
130 }
131
132 private:
133 // Must hold the global mutex.
134 std::shared_ptr<T> exchangeLocked(
135 std::shared_ptr<T> ptr,
136 std::memory_order order = std::memory_order_seq_cst) {
137 // This is where the tricky bits happen; all modifications of the mainPtrs_
138 // and index happen here. We maintain the invariant that, on entry to this
139 // method, all read-side critical sections in progress are using the version
140 // indicated by curMainPtrIndex_, and the other version is nulled out.
141 // (Readers can still hold a ReadMostlySharedPtr to the thing the old
142 // version used to point to; they just can't access the old version to get
143 // that handle any more).
144 auto index = curMainPtrIndex_.load(std::memory_order_relaxed);
145 ReadMostlyMainPtr<T>& oldMain = mainPtrs_[index];
146 ReadMostlyMainPtr<T>& newMain = mainPtrs_[1 - index];
147 DCHECK(newMain.get() == nullptr)
148 << "Invariant should ensure that at most one version is non-null";
149 newMain.reset(std::move(ptr));
150 // If order is acq_rel, it should degrade to just release, since this is a
151 // store rather than an RMW. (Of course, this is such a slow method that we
152 // don't really care, but precision is its own reward. If TSAN one day
153 // understands asymmetric barriers, this will also improve its error
154 // detection here). We get our "acquire-y-ness" from the mutex.
155 auto realOrder =
156 (order == std::memory_order_acq_rel ? std::memory_order_release
157 : order);
158 // After this, read-side critical sections can access both versions, but
159 // new ones will use newMain.
160 // This is also synchronization point with loads.
161 curMainPtrIndex_.store(1 - index, realOrder);
162 // Wait for all read-side critical sections using oldMain to finish.
163 detail::atomicReadMostlyDomain->synchronize();
164 // We've reestablished the first half of the invariant (all readers are
165 // using newMain), now let's establish the other one (that the other pointer
166 // is null).
167 auto result = oldMain.getStdShared();
168 oldMain.reset();
169 return result;
170 }
171
172 // The right way to think of this implementation is as an
173 // std::atomic<ReadMostlyMainPtr<T>*>, protected by RCU. There's only two
174 // tricky parts:
175 // 1. We give ourselves our own RCU domain, and synchronize on modification,
176 // so that we don't do any batching of deallocations. This gives
177 // shared_ptr-like eager reclamation semantics.
178 // 2. Instead of putting the ReadMostlyMainPtrs on the heap, we keep them as
179 // part of the same object to improve locality.
180
181 // Really, just a 0/1 index. This is also the synchronization point for memory
182 // orders.
183 std::atomic<uint8_t> curMainPtrIndex_;
184
185 // Both the ReadMostlyMainPtrs themselves and the domain have nontrivial
186 // indirections even on the read path, and asymmetric barriers on the write
187 // path. Some of these could be fused as a later optimization, at the cost of
188 // having to put more tricky threading primitives in this class that are
189 // currently abstracted out by those.
190 ReadMostlyMainPtr<T> mainPtrs_[2];
191};
192
193} // namespace folly
194