1 | /* |
2 | * Copyright 2014-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <assert.h> |
20 | #include <errno.h> |
21 | #include <stdint.h> |
22 | #include <atomic> |
23 | #include <thread> |
24 | |
25 | #include <folly/Likely.h> |
26 | #include <folly/detail/Futex.h> |
27 | #include <folly/detail/MemoryIdler.h> |
28 | #include <folly/portability/Asm.h> |
29 | #include <folly/synchronization/WaitOptions.h> |
30 | #include <folly/synchronization/detail/Spin.h> |
31 | |
32 | namespace folly { |
33 | |
34 | /// A Baton allows a thread to block once and be awoken. Captures a |
35 | /// single handoff, and during its lifecycle (from construction/reset |
36 | /// to destruction/reset) a baton must either be post()ed and wait()ed |
37 | /// exactly once each, or not at all. |
38 | /// |
39 | /// Baton includes no internal padding, and is only 4 bytes in size. |
40 | /// Any alignment or padding to avoid false sharing is up to the user. |
41 | /// |
42 | /// This is basically a stripped-down semaphore that supports only a |
43 | /// single call to sem_post and a single call to sem_wait. |
44 | /// |
45 | /// The non-blocking version (MayBlock == false) provides more speed |
46 | /// by using only load acquire and store release operations in the |
47 | /// critical path, at the cost of disallowing blocking. |
48 | /// |
49 | /// The current posix semaphore sem_t isn't too bad, but this provides |
50 | /// more a bit more speed, inlining, smaller size, a guarantee that |
51 | /// the implementation won't change, and compatibility with |
52 | /// DeterministicSchedule. By having a much more restrictive |
53 | /// lifecycle we can also add a bunch of assertions that can help to |
54 | /// catch race conditions ahead of time. |
55 | template <bool MayBlock = true, template <typename> class Atom = std::atomic> |
56 | class Baton { |
57 | public: |
58 | FOLLY_ALWAYS_INLINE static constexpr WaitOptions wait_options() { |
59 | return {}; |
60 | } |
61 | |
62 | constexpr Baton() noexcept : state_(INIT) {} |
63 | |
64 | Baton(Baton const&) = delete; |
65 | Baton& operator=(Baton const&) = delete; |
66 | |
67 | /// It is an error to destroy a Baton on which a thread is currently |
68 | /// wait()ing. In practice this means that the waiter usually takes |
69 | /// responsibility for destroying the Baton. |
70 | ~Baton() noexcept { |
71 | // The docblock for this function says that it can't be called when |
72 | // there is a concurrent waiter. We assume a strong version of this |
73 | // requirement in which the caller must _know_ that this is true, they |
74 | // are not allowed to be merely lucky. If two threads are involved, |
75 | // the destroying thread must actually have synchronized with the |
76 | // waiting thread after wait() returned. To convey causality the the |
77 | // waiting thread must have used release semantics and the destroying |
78 | // thread must have used acquire semantics for that communication, |
79 | // so we are guaranteed to see the post-wait() value of state_, |
80 | // which cannot be WAITING. |
81 | // |
82 | // Note that since we only care about a single memory location, |
83 | // the only two plausible memory orders here are relaxed and seq_cst. |
84 | assert(state_.load(std::memory_order_relaxed) != WAITING); |
85 | } |
86 | |
87 | FOLLY_ALWAYS_INLINE bool ready() const noexcept { |
88 | auto s = state_.load(std::memory_order_acquire); |
89 | assert(s == INIT || s == EARLY_DELIVERY); |
90 | return LIKELY(s == EARLY_DELIVERY); |
91 | } |
92 | |
93 | /// Equivalent to destroying the Baton and creating a new one. It is |
94 | /// a bug to call this while there is a waiting thread, so in practice |
95 | /// the waiter will be the one that resets the baton. |
96 | void reset() noexcept { |
97 | // See ~Baton for a discussion about why relaxed is okay here |
98 | assert(state_.load(std::memory_order_relaxed) != WAITING); |
99 | |
100 | // We use a similar argument to justify the use of a relaxed store |
101 | // here. Since both wait() and post() are required to be called |
102 | // only once per lifetime, no thread can actually call those methods |
103 | // correctly after a reset() unless it synchronizes with the thread |
104 | // that performed the reset(). If a post() or wait() on another thread |
105 | // didn't synchronize, then regardless of what operation we performed |
106 | // here there would be a race on proper use of the Baton's spec |
107 | // (although not on any particular load and store). Put another way, |
108 | // we don't need to synchronize here because anybody that might rely |
109 | // on such synchronization is required by the baton rules to perform |
110 | // an additional synchronization that has the desired effect anyway. |
111 | // |
112 | // There is actually a similar argument to be made about the |
113 | // constructor, in which the fenceless constructor initialization |
114 | // of state_ is piggybacked on whatever synchronization mechanism |
115 | // distributes knowledge of the Baton's existence |
116 | state_.store(INIT, std::memory_order_relaxed); |
117 | } |
118 | |
119 | /// Causes wait() to wake up. For each lifetime of a Baton (where a |
120 | /// lifetime starts at construction or reset() and ends at |
121 | /// destruction or reset()) there can be at most one call to post(), |
122 | /// in the single poster version. Any thread may call post(). |
123 | void post() noexcept { |
124 | if (!MayBlock) { |
125 | /// Spin-only version |
126 | /// |
127 | assert( |
128 | ((1 << state_.load(std::memory_order_relaxed)) & |
129 | ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0); |
130 | state_.store(EARLY_DELIVERY, std::memory_order_release); |
131 | return; |
132 | } |
133 | |
134 | /// May-block versions |
135 | /// |
136 | uint32_t before = state_.load(std::memory_order_acquire); |
137 | |
138 | assert(before == INIT || before == WAITING || before == TIMED_OUT); |
139 | |
140 | if (before == INIT && |
141 | state_.compare_exchange_strong( |
142 | before, |
143 | EARLY_DELIVERY, |
144 | std::memory_order_release, |
145 | std::memory_order_relaxed)) { |
146 | return; |
147 | } |
148 | |
149 | assert(before == WAITING || before == TIMED_OUT); |
150 | |
151 | if (before == TIMED_OUT) { |
152 | return; |
153 | } |
154 | |
155 | assert(before == WAITING); |
156 | state_.store(LATE_DELIVERY, std::memory_order_release); |
157 | detail::futexWake(&state_, 1); |
158 | } |
159 | |
160 | /// Waits until post() has been called in the current Baton lifetime. |
161 | /// May be called at most once during a Baton lifetime (construction |
162 | /// |reset until destruction|reset). If post is called before wait in |
163 | /// the current lifetime then this method returns immediately. |
164 | /// |
165 | /// The restriction that there can be at most one wait() per lifetime |
166 | /// could be relaxed somewhat without any perf or size regressions, |
167 | /// but by making this condition very restrictive we can provide better |
168 | /// checking in debug builds. |
169 | FOLLY_ALWAYS_INLINE |
170 | void wait(const WaitOptions& opt = wait_options()) noexcept { |
171 | if (try_wait()) { |
172 | return; |
173 | } |
174 | |
175 | auto const deadline = std::chrono::steady_clock::time_point::max(); |
176 | tryWaitSlow(deadline, opt); |
177 | } |
178 | |
179 | /// Similar to wait, but doesn't block the thread if it hasn't been posted. |
180 | /// |
181 | /// try_wait has the following semantics: |
182 | /// - It is ok to call try_wait any number times on the same baton until |
183 | /// try_wait reports that the baton has been posted. |
184 | /// - It is ok to call timed_wait or wait on the same baton if try_wait |
185 | /// reports that baton hasn't been posted. |
186 | /// - If try_wait indicates that the baton has been posted, it is invalid to |
187 | /// call wait, try_wait or timed_wait on the same baton without resetting |
188 | /// |
189 | /// @return true if baton has been posted, false othewise |
190 | FOLLY_ALWAYS_INLINE bool try_wait() const noexcept { |
191 | return ready(); |
192 | } |
193 | |
194 | /// Similar to wait, but with a timeout. The thread is unblocked if the |
195 | /// timeout expires. |
196 | /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed |
197 | /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other |
198 | /// words, after try_wait_for the caller can't invoke |
199 | /// wait/try_wait/try_wait_for/try_wait_until |
200 | /// again on the same baton without resetting it. |
201 | /// |
202 | /// @param timeout Time until which the thread can block |
203 | /// @return true if the baton was posted to before timeout, |
204 | /// false otherwise |
205 | template <typename Rep, typename Period> |
206 | FOLLY_ALWAYS_INLINE bool try_wait_for( |
207 | const std::chrono::duration<Rep, Period>& timeout, |
208 | const WaitOptions& opt = wait_options()) noexcept { |
209 | if (try_wait()) { |
210 | return true; |
211 | } |
212 | |
213 | auto const deadline = std::chrono::steady_clock::now() + timeout; |
214 | return tryWaitSlow(deadline, opt); |
215 | } |
216 | |
217 | /// Similar to wait, but with a deadline. The thread is unblocked if the |
218 | /// deadline expires. |
219 | /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed |
220 | /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other |
221 | /// words, after try_wait_until the caller can't invoke |
222 | /// wait/try_wait/try_wait_for/try_wait_until |
223 | /// again on the same baton without resetting it. |
224 | /// |
225 | /// @param deadline Time until which the thread can block |
226 | /// @return true if the baton was posted to before deadline, |
227 | /// false otherwise |
228 | template <typename Clock, typename Duration> |
229 | FOLLY_ALWAYS_INLINE bool try_wait_until( |
230 | const std::chrono::time_point<Clock, Duration>& deadline, |
231 | const WaitOptions& opt = wait_options()) noexcept { |
232 | if (try_wait()) { |
233 | return true; |
234 | } |
235 | |
236 | return tryWaitSlow(deadline, opt); |
237 | } |
238 | |
239 | /// Alias to try_wait_for. Deprecated. |
240 | template <typename Rep, typename Period> |
241 | FOLLY_ALWAYS_INLINE bool timed_wait( |
242 | const std::chrono::duration<Rep, Period>& timeout) noexcept { |
243 | return try_wait_for(timeout); |
244 | } |
245 | |
246 | /// Alias to try_wait_until. Deprecated. |
247 | template <typename Clock, typename Duration> |
248 | FOLLY_ALWAYS_INLINE bool timed_wait( |
249 | const std::chrono::time_point<Clock, Duration>& deadline) noexcept { |
250 | return try_wait_until(deadline); |
251 | } |
252 | |
253 | private: |
254 | enum State : uint32_t { |
255 | INIT = 0, |
256 | EARLY_DELIVERY = 1, |
257 | WAITING = 2, |
258 | LATE_DELIVERY = 3, |
259 | TIMED_OUT = 4, |
260 | }; |
261 | |
262 | template <typename Clock, typename Duration> |
263 | FOLLY_NOINLINE bool tryWaitSlow( |
264 | const std::chrono::time_point<Clock, Duration>& deadline, |
265 | const WaitOptions& opt) noexcept { |
266 | switch (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) { |
267 | case detail::spin_result::success: |
268 | return true; |
269 | case detail::spin_result::timeout: |
270 | return false; |
271 | case detail::spin_result::advance: |
272 | break; |
273 | } |
274 | |
275 | if (!MayBlock) { |
276 | switch (detail::spin_yield_until(deadline, [=] { return ready(); })) { |
277 | case detail::spin_result::success: |
278 | return true; |
279 | case detail::spin_result::timeout: |
280 | return false; |
281 | case detail::spin_result::advance: |
282 | break; |
283 | } |
284 | } |
285 | |
286 | // guess we have to block :( |
287 | uint32_t expected = INIT; |
288 | if (!state_.compare_exchange_strong( |
289 | expected, |
290 | WAITING, |
291 | std::memory_order_relaxed, |
292 | std::memory_order_relaxed)) { |
293 | // CAS failed, last minute reprieve |
294 | assert(expected == EARLY_DELIVERY); |
295 | // TODO: move the acquire to the compare_exchange failure load after C++17 |
296 | std::atomic_thread_fence(std::memory_order_acquire); |
297 | return true; |
298 | } |
299 | |
300 | while (true) { |
301 | auto rv = detail::MemoryIdler::futexWaitUntil(state_, WAITING, deadline); |
302 | |
303 | // Awoken by the deadline passing. |
304 | if (rv == detail::FutexResult::TIMEDOUT) { |
305 | assert(deadline != (std::chrono::time_point<Clock, Duration>::max())); |
306 | state_.store(TIMED_OUT, std::memory_order_release); |
307 | return false; |
308 | } |
309 | |
310 | // Probably awoken by a matching wake event, but could also by awoken |
311 | // by an asynchronous signal or by a spurious wakeup. |
312 | // |
313 | // state_ is the truth even if FUTEX_WAIT reported a matching |
314 | // FUTEX_WAKE, since we aren't using type-stable storage and we |
315 | // don't guarantee reuse. The scenario goes like this: thread |
316 | // A's last touch of a Baton is a call to wake(), which stores |
317 | // LATE_DELIVERY and gets an unlucky context switch before delivering |
318 | // the corresponding futexWake. Thread B sees LATE_DELIVERY |
319 | // without consuming a futex event, because it calls futexWait |
320 | // with an expected value of WAITING and hence doesn't go to sleep. |
321 | // B returns, so the Baton's memory is reused and becomes another |
322 | // Baton (or a reuse of this one). B calls futexWait on the new |
323 | // Baton lifetime, then A wakes up and delivers a spurious futexWake |
324 | // to the same memory location. B's futexWait will then report a |
325 | // consumed wake event even though state_ is still WAITING. |
326 | // |
327 | // It would be possible to add an extra state_ dance to communicate |
328 | // that the futexWake has been sent so that we can be sure to consume |
329 | // it before returning, but that would be a perf and complexity hit. |
330 | uint32_t s = state_.load(std::memory_order_acquire); |
331 | assert(s == WAITING || s == LATE_DELIVERY); |
332 | if (s == LATE_DELIVERY) { |
333 | return true; |
334 | } |
335 | } |
336 | } |
337 | |
338 | detail::Futex<Atom> state_; |
339 | }; |
340 | |
341 | } // namespace folly |
342 | |