1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #include <folly/detail/Futex.h> |
17 | #include <folly/ScopeGuard.h> |
18 | #include <folly/hash/Hash.h> |
19 | #include <folly/portability/SysSyscall.h> |
20 | #include <stdint.h> |
21 | #include <string.h> |
22 | #include <array> |
23 | #include <cerrno> |
24 | |
25 | #include <folly/synchronization/ParkingLot.h> |
26 | |
27 | #ifdef __linux__ |
28 | #include <linux/futex.h> |
29 | #endif |
30 | |
31 | using namespace std::chrono; |
32 | |
33 | namespace folly { |
34 | namespace detail { |
35 | |
36 | namespace { |
37 | |
38 | //////////////////////////////////////////////////// |
39 | // native implementation using the futex() syscall |
40 | |
41 | #ifdef __linux__ |
42 | |
43 | /// Certain toolchains (like Android's) don't include the full futex API in |
44 | /// their headers even though they support it. Make sure we have our constants |
45 | /// even if the headers don't have them. |
46 | #ifndef FUTEX_WAIT_BITSET |
47 | #define FUTEX_WAIT_BITSET 9 |
48 | #endif |
49 | #ifndef FUTEX_WAKE_BITSET |
50 | #define FUTEX_WAKE_BITSET 10 |
51 | #endif |
52 | #ifndef FUTEX_PRIVATE_FLAG |
53 | #define FUTEX_PRIVATE_FLAG 128 |
54 | #endif |
55 | #ifndef FUTEX_CLOCK_REALTIME |
56 | #define FUTEX_CLOCK_REALTIME 256 |
57 | #endif |
58 | |
59 | int nativeFutexWake(const void* addr, int count, uint32_t wakeMask) { |
60 | int rv = syscall( |
61 | __NR_futex, |
62 | addr, /* addr1 */ |
63 | FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */ |
64 | count, /* val */ |
65 | nullptr, /* timeout */ |
66 | nullptr, /* addr2 */ |
67 | wakeMask); /* val3 */ |
68 | |
69 | /* NOTE: we ignore errors on wake for the case of a futex |
70 | guarding its own destruction, similar to this |
71 | glibc bug with sem_post/sem_wait: |
72 | https://sourceware.org/bugzilla/show_bug.cgi?id=12674 */ |
73 | if (rv < 0) { |
74 | return 0; |
75 | } |
76 | return rv; |
77 | } |
78 | |
79 | template <class Clock> |
80 | struct timespec timeSpecFromTimePoint(time_point<Clock> absTime) { |
81 | auto epoch = absTime.time_since_epoch(); |
82 | if (epoch.count() < 0) { |
83 | // kernel timespec_valid requires non-negative seconds and nanos in [0,1G) |
84 | epoch = Clock::duration::zero(); |
85 | } |
86 | |
87 | // timespec-safe seconds and nanoseconds; |
88 | // chrono::{nano,}seconds are `long long int` |
89 | // whereas timespec uses smaller types |
90 | using time_t_seconds = duration<std::time_t, seconds::period>; |
91 | using long_nanos = duration<long int, nanoseconds::period>; |
92 | |
93 | auto secs = duration_cast<time_t_seconds>(epoch); |
94 | auto nanos = duration_cast<long_nanos>(epoch - secs); |
95 | struct timespec result = {secs.count(), nanos.count()}; |
96 | return result; |
97 | } |
98 | |
99 | FutexResult nativeFutexWaitImpl( |
100 | const void* addr, |
101 | uint32_t expected, |
102 | system_clock::time_point const* absSystemTime, |
103 | steady_clock::time_point const* absSteadyTime, |
104 | uint32_t waitMask) { |
105 | assert(absSystemTime == nullptr || absSteadyTime == nullptr); |
106 | |
107 | int op = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG; |
108 | struct timespec ts; |
109 | struct timespec* timeout = nullptr; |
110 | |
111 | if (absSystemTime != nullptr) { |
112 | op |= FUTEX_CLOCK_REALTIME; |
113 | ts = timeSpecFromTimePoint(*absSystemTime); |
114 | timeout = &ts; |
115 | } else if (absSteadyTime != nullptr) { |
116 | ts = timeSpecFromTimePoint(*absSteadyTime); |
117 | timeout = &ts; |
118 | } |
119 | |
120 | // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout |
121 | // value - http://locklessinc.com/articles/futex_cheat_sheet/ |
122 | int rv = syscall( |
123 | __NR_futex, |
124 | addr, /* addr1 */ |
125 | op, /* op */ |
126 | expected, /* val */ |
127 | timeout, /* timeout */ |
128 | nullptr, /* addr2 */ |
129 | waitMask); /* val3 */ |
130 | |
131 | if (rv == 0) { |
132 | return FutexResult::AWOKEN; |
133 | } else { |
134 | switch (errno) { |
135 | case ETIMEDOUT: |
136 | assert(timeout != nullptr); |
137 | return FutexResult::TIMEDOUT; |
138 | case EINTR: |
139 | return FutexResult::INTERRUPTED; |
140 | case EWOULDBLOCK: |
141 | return FutexResult::VALUE_CHANGED; |
142 | default: |
143 | assert(false); |
144 | // EINVAL, EACCESS, or EFAULT. EINVAL means there was an invalid |
145 | // op (should be impossible) or an invalid timeout (should have |
146 | // been sanitized by timeSpecFromTimePoint). EACCESS or EFAULT |
147 | // means *addr points to invalid memory, which is unlikely because |
148 | // the caller should have segfaulted already. We can either |
149 | // crash, or return a value that lets the process continue for |
150 | // a bit. We choose the latter. VALUE_CHANGED probably turns the |
151 | // caller into a spin lock. |
152 | return FutexResult::VALUE_CHANGED; |
153 | } |
154 | } |
155 | } |
156 | |
157 | #endif // __linux__ |
158 | |
159 | /////////////////////////////////////////////////////// |
160 | // compatibility implementation using standard C++ API |
161 | |
162 | using Lot = ParkingLot<uint32_t>; |
163 | Lot parkingLot; |
164 | |
165 | int emulatedFutexWake(const void* addr, int count, uint32_t waitMask) { |
166 | int woken = 0; |
167 | parkingLot.unpark(addr, [&](const uint32_t& mask) { |
168 | if ((mask & waitMask) == 0) { |
169 | return UnparkControl::RetainContinue; |
170 | } |
171 | assert(count > 0); |
172 | count--; |
173 | woken++; |
174 | return count > 0 ? UnparkControl::RemoveContinue |
175 | : UnparkControl::RemoveBreak; |
176 | }); |
177 | return woken; |
178 | } |
179 | |
180 | template <typename F> |
181 | FutexResult emulatedFutexWaitImpl( |
182 | F* futex, |
183 | uint32_t expected, |
184 | system_clock::time_point const* absSystemTime, |
185 | steady_clock::time_point const* absSteadyTime, |
186 | uint32_t waitMask) { |
187 | static_assert( |
188 | std::is_same<F, const Futex<std::atomic>>::value || |
189 | std::is_same<F, const Futex<EmulatedFutexAtomic>>::value, |
190 | "Type F must be either Futex<std::atomic> or Futex<EmulatedFutexAtomic>" ); |
191 | ParkResult res; |
192 | if (absSystemTime) { |
193 | res = parkingLot.park_until( |
194 | futex, |
195 | waitMask, |
196 | [&] { return *futex == expected; }, |
197 | [] {}, |
198 | *absSystemTime); |
199 | } else if (absSteadyTime) { |
200 | res = parkingLot.park_until( |
201 | futex, |
202 | waitMask, |
203 | [&] { return *futex == expected; }, |
204 | [] {}, |
205 | *absSteadyTime); |
206 | } else { |
207 | res = parkingLot.park( |
208 | futex, waitMask, [&] { return *futex == expected; }, [] {}); |
209 | } |
210 | switch (res) { |
211 | case ParkResult::Skip: |
212 | return FutexResult::VALUE_CHANGED; |
213 | case ParkResult::Unpark: |
214 | return FutexResult::AWOKEN; |
215 | case ParkResult::Timeout: |
216 | return FutexResult::TIMEDOUT; |
217 | } |
218 | |
219 | return FutexResult::INTERRUPTED; |
220 | } |
221 | |
222 | } // namespace |
223 | |
224 | ///////////////////////////////// |
225 | // Futex<> overloads |
226 | |
227 | int futexWakeImpl( |
228 | const Futex<std::atomic>* futex, |
229 | int count, |
230 | uint32_t wakeMask) { |
231 | #ifdef __linux__ |
232 | return nativeFutexWake(futex, count, wakeMask); |
233 | #else |
234 | return emulatedFutexWake(futex, count, wakeMask); |
235 | #endif |
236 | } |
237 | |
238 | int futexWakeImpl( |
239 | const Futex<EmulatedFutexAtomic>* futex, |
240 | int count, |
241 | uint32_t wakeMask) { |
242 | return emulatedFutexWake(futex, count, wakeMask); |
243 | } |
244 | |
245 | FutexResult futexWaitImpl( |
246 | const Futex<std::atomic>* futex, |
247 | uint32_t expected, |
248 | system_clock::time_point const* absSystemTime, |
249 | steady_clock::time_point const* absSteadyTime, |
250 | uint32_t waitMask) { |
251 | #ifdef __linux__ |
252 | return nativeFutexWaitImpl( |
253 | futex, expected, absSystemTime, absSteadyTime, waitMask); |
254 | #else |
255 | return emulatedFutexWaitImpl( |
256 | futex, expected, absSystemTime, absSteadyTime, waitMask); |
257 | #endif |
258 | } |
259 | |
260 | FutexResult futexWaitImpl( |
261 | const Futex<EmulatedFutexAtomic>* futex, |
262 | uint32_t expected, |
263 | system_clock::time_point const* absSystemTime, |
264 | steady_clock::time_point const* absSteadyTime, |
265 | uint32_t waitMask) { |
266 | return emulatedFutexWaitImpl( |
267 | futex, expected, absSystemTime, absSteadyTime, waitMask); |
268 | } |
269 | |
270 | } // namespace detail |
271 | } // namespace folly |
272 | |