1/*
2 * Copyright 2014-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <assert.h>
20#include <errno.h>
21#include <stdint.h>
22#include <atomic>
23#include <thread>
24
25#include <folly/Likely.h>
26#include <folly/detail/Futex.h>
27#include <folly/detail/MemoryIdler.h>
28#include <folly/portability/Asm.h>
29#include <folly/synchronization/WaitOptions.h>
30#include <folly/synchronization/detail/Spin.h>
31
32namespace folly {
33
34/// A Baton allows a thread to block once and be awoken. Captures a
35/// single handoff, and during its lifecycle (from construction/reset
36/// to destruction/reset) a baton must either be post()ed and wait()ed
37/// exactly once each, or not at all.
38///
39/// Baton includes no internal padding, and is only 4 bytes in size.
40/// Any alignment or padding to avoid false sharing is up to the user.
41///
42/// This is basically a stripped-down semaphore that supports only a
43/// single call to sem_post and a single call to sem_wait.
44///
45/// The non-blocking version (MayBlock == false) provides more speed
46/// by using only load acquire and store release operations in the
47/// critical path, at the cost of disallowing blocking.
48///
49/// The current posix semaphore sem_t isn't too bad, but this provides
50/// more a bit more speed, inlining, smaller size, a guarantee that
51/// the implementation won't change, and compatibility with
52/// DeterministicSchedule. By having a much more restrictive
53/// lifecycle we can also add a bunch of assertions that can help to
54/// catch race conditions ahead of time.
55template <bool MayBlock = true, template <typename> class Atom = std::atomic>
56class Baton {
57 public:
58 FOLLY_ALWAYS_INLINE static constexpr WaitOptions wait_options() {
59 return {};
60 }
61
62 constexpr Baton() noexcept : state_(INIT) {}
63
64 Baton(Baton const&) = delete;
65 Baton& operator=(Baton const&) = delete;
66
67 /// It is an error to destroy a Baton on which a thread is currently
68 /// wait()ing. In practice this means that the waiter usually takes
69 /// responsibility for destroying the Baton.
70 ~Baton() noexcept {
71 // The docblock for this function says that it can't be called when
72 // there is a concurrent waiter. We assume a strong version of this
73 // requirement in which the caller must _know_ that this is true, they
74 // are not allowed to be merely lucky. If two threads are involved,
75 // the destroying thread must actually have synchronized with the
76 // waiting thread after wait() returned. To convey causality the the
77 // waiting thread must have used release semantics and the destroying
78 // thread must have used acquire semantics for that communication,
79 // so we are guaranteed to see the post-wait() value of state_,
80 // which cannot be WAITING.
81 //
82 // Note that since we only care about a single memory location,
83 // the only two plausible memory orders here are relaxed and seq_cst.
84 assert(state_.load(std::memory_order_relaxed) != WAITING);
85 }
86
87 FOLLY_ALWAYS_INLINE bool ready() const noexcept {
88 auto s = state_.load(std::memory_order_acquire);
89 assert(s == INIT || s == EARLY_DELIVERY);
90 return LIKELY(s == EARLY_DELIVERY);
91 }
92
93 /// Equivalent to destroying the Baton and creating a new one. It is
94 /// a bug to call this while there is a waiting thread, so in practice
95 /// the waiter will be the one that resets the baton.
96 void reset() noexcept {
97 // See ~Baton for a discussion about why relaxed is okay here
98 assert(state_.load(std::memory_order_relaxed) != WAITING);
99
100 // We use a similar argument to justify the use of a relaxed store
101 // here. Since both wait() and post() are required to be called
102 // only once per lifetime, no thread can actually call those methods
103 // correctly after a reset() unless it synchronizes with the thread
104 // that performed the reset(). If a post() or wait() on another thread
105 // didn't synchronize, then regardless of what operation we performed
106 // here there would be a race on proper use of the Baton's spec
107 // (although not on any particular load and store). Put another way,
108 // we don't need to synchronize here because anybody that might rely
109 // on such synchronization is required by the baton rules to perform
110 // an additional synchronization that has the desired effect anyway.
111 //
112 // There is actually a similar argument to be made about the
113 // constructor, in which the fenceless constructor initialization
114 // of state_ is piggybacked on whatever synchronization mechanism
115 // distributes knowledge of the Baton's existence
116 state_.store(INIT, std::memory_order_relaxed);
117 }
118
119 /// Causes wait() to wake up. For each lifetime of a Baton (where a
120 /// lifetime starts at construction or reset() and ends at
121 /// destruction or reset()) there can be at most one call to post(),
122 /// in the single poster version. Any thread may call post().
123 void post() noexcept {
124 if (!MayBlock) {
125 /// Spin-only version
126 ///
127 assert(
128 ((1 << state_.load(std::memory_order_relaxed)) &
129 ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0);
130 state_.store(EARLY_DELIVERY, std::memory_order_release);
131 return;
132 }
133
134 /// May-block versions
135 ///
136 uint32_t before = state_.load(std::memory_order_acquire);
137
138 assert(before == INIT || before == WAITING || before == TIMED_OUT);
139
140 if (before == INIT &&
141 state_.compare_exchange_strong(
142 before,
143 EARLY_DELIVERY,
144 std::memory_order_release,
145 std::memory_order_relaxed)) {
146 return;
147 }
148
149 assert(before == WAITING || before == TIMED_OUT);
150
151 if (before == TIMED_OUT) {
152 return;
153 }
154
155 assert(before == WAITING);
156 state_.store(LATE_DELIVERY, std::memory_order_release);
157 detail::futexWake(&state_, 1);
158 }
159
160 /// Waits until post() has been called in the current Baton lifetime.
161 /// May be called at most once during a Baton lifetime (construction
162 /// |reset until destruction|reset). If post is called before wait in
163 /// the current lifetime then this method returns immediately.
164 ///
165 /// The restriction that there can be at most one wait() per lifetime
166 /// could be relaxed somewhat without any perf or size regressions,
167 /// but by making this condition very restrictive we can provide better
168 /// checking in debug builds.
169 FOLLY_ALWAYS_INLINE
170 void wait(const WaitOptions& opt = wait_options()) noexcept {
171 if (try_wait()) {
172 return;
173 }
174
175 auto const deadline = std::chrono::steady_clock::time_point::max();
176 tryWaitSlow(deadline, opt);
177 }
178
179 /// Similar to wait, but doesn't block the thread if it hasn't been posted.
180 ///
181 /// try_wait has the following semantics:
182 /// - It is ok to call try_wait any number times on the same baton until
183 /// try_wait reports that the baton has been posted.
184 /// - It is ok to call timed_wait or wait on the same baton if try_wait
185 /// reports that baton hasn't been posted.
186 /// - If try_wait indicates that the baton has been posted, it is invalid to
187 /// call wait, try_wait or timed_wait on the same baton without resetting
188 ///
189 /// @return true if baton has been posted, false othewise
190 FOLLY_ALWAYS_INLINE bool try_wait() const noexcept {
191 return ready();
192 }
193
194 /// Similar to wait, but with a timeout. The thread is unblocked if the
195 /// timeout expires.
196 /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
197 /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
198 /// words, after try_wait_for the caller can't invoke
199 /// wait/try_wait/try_wait_for/try_wait_until
200 /// again on the same baton without resetting it.
201 ///
202 /// @param timeout Time until which the thread can block
203 /// @return true if the baton was posted to before timeout,
204 /// false otherwise
205 template <typename Rep, typename Period>
206 FOLLY_ALWAYS_INLINE bool try_wait_for(
207 const std::chrono::duration<Rep, Period>& timeout,
208 const WaitOptions& opt = wait_options()) noexcept {
209 if (try_wait()) {
210 return true;
211 }
212
213 auto const deadline = std::chrono::steady_clock::now() + timeout;
214 return tryWaitSlow(deadline, opt);
215 }
216
217 /// Similar to wait, but with a deadline. The thread is unblocked if the
218 /// deadline expires.
219 /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
220 /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
221 /// words, after try_wait_until the caller can't invoke
222 /// wait/try_wait/try_wait_for/try_wait_until
223 /// again on the same baton without resetting it.
224 ///
225 /// @param deadline Time until which the thread can block
226 /// @return true if the baton was posted to before deadline,
227 /// false otherwise
228 template <typename Clock, typename Duration>
229 FOLLY_ALWAYS_INLINE bool try_wait_until(
230 const std::chrono::time_point<Clock, Duration>& deadline,
231 const WaitOptions& opt = wait_options()) noexcept {
232 if (try_wait()) {
233 return true;
234 }
235
236 return tryWaitSlow(deadline, opt);
237 }
238
239 /// Alias to try_wait_for. Deprecated.
240 template <typename Rep, typename Period>
241 FOLLY_ALWAYS_INLINE bool timed_wait(
242 const std::chrono::duration<Rep, Period>& timeout) noexcept {
243 return try_wait_for(timeout);
244 }
245
246 /// Alias to try_wait_until. Deprecated.
247 template <typename Clock, typename Duration>
248 FOLLY_ALWAYS_INLINE bool timed_wait(
249 const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
250 return try_wait_until(deadline);
251 }
252
253 private:
254 enum State : uint32_t {
255 INIT = 0,
256 EARLY_DELIVERY = 1,
257 WAITING = 2,
258 LATE_DELIVERY = 3,
259 TIMED_OUT = 4,
260 };
261
262 template <typename Clock, typename Duration>
263 FOLLY_NOINLINE bool tryWaitSlow(
264 const std::chrono::time_point<Clock, Duration>& deadline,
265 const WaitOptions& opt) noexcept {
266 switch (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) {
267 case detail::spin_result::success:
268 return true;
269 case detail::spin_result::timeout:
270 return false;
271 case detail::spin_result::advance:
272 break;
273 }
274
275 if (!MayBlock) {
276 switch (detail::spin_yield_until(deadline, [=] { return ready(); })) {
277 case detail::spin_result::success:
278 return true;
279 case detail::spin_result::timeout:
280 return false;
281 case detail::spin_result::advance:
282 break;
283 }
284 }
285
286 // guess we have to block :(
287 uint32_t expected = INIT;
288 if (!state_.compare_exchange_strong(
289 expected,
290 WAITING,
291 std::memory_order_relaxed,
292 std::memory_order_relaxed)) {
293 // CAS failed, last minute reprieve
294 assert(expected == EARLY_DELIVERY);
295 // TODO: move the acquire to the compare_exchange failure load after C++17
296 std::atomic_thread_fence(std::memory_order_acquire);
297 return true;
298 }
299
300 while (true) {
301 auto rv = detail::MemoryIdler::futexWaitUntil(state_, WAITING, deadline);
302
303 // Awoken by the deadline passing.
304 if (rv == detail::FutexResult::TIMEDOUT) {
305 assert(deadline != (std::chrono::time_point<Clock, Duration>::max()));
306 state_.store(TIMED_OUT, std::memory_order_release);
307 return false;
308 }
309
310 // Probably awoken by a matching wake event, but could also by awoken
311 // by an asynchronous signal or by a spurious wakeup.
312 //
313 // state_ is the truth even if FUTEX_WAIT reported a matching
314 // FUTEX_WAKE, since we aren't using type-stable storage and we
315 // don't guarantee reuse. The scenario goes like this: thread
316 // A's last touch of a Baton is a call to wake(), which stores
317 // LATE_DELIVERY and gets an unlucky context switch before delivering
318 // the corresponding futexWake. Thread B sees LATE_DELIVERY
319 // without consuming a futex event, because it calls futexWait
320 // with an expected value of WAITING and hence doesn't go to sleep.
321 // B returns, so the Baton's memory is reused and becomes another
322 // Baton (or a reuse of this one). B calls futexWait on the new
323 // Baton lifetime, then A wakes up and delivers a spurious futexWake
324 // to the same memory location. B's futexWait will then report a
325 // consumed wake event even though state_ is still WAITING.
326 //
327 // It would be possible to add an extra state_ dance to communicate
328 // that the futexWake has been sent so that we can be sure to consume
329 // it before returning, but that would be a perf and complexity hit.
330 uint32_t s = state_.load(std::memory_order_acquire);
331 assert(s == WAITING || s == LATE_DELIVERY);
332 if (s == LATE_DELIVERY) {
333 return true;
334 }
335 }
336 }
337
338 detail::Futex<Atom> state_;
339};
340
341} // namespace folly
342