| 1 | /* POSIX reader--writer lock: core parts. | 
|---|
| 2 | Copyright (C) 2016-2020 Free Software Foundation, Inc. | 
|---|
| 3 | This file is part of the GNU C Library. | 
|---|
| 4 |  | 
|---|
| 5 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 6 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 7 | License as published by the Free Software Foundation; either | 
|---|
| 8 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 9 |  | 
|---|
| 10 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU | 
|---|
| 13 | Lesser General Public License for more details. | 
|---|
| 14 |  | 
|---|
| 15 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 16 | License along with the GNU C Library; if not, see | 
|---|
| 17 | <https://www.gnu.org/licenses/>.  */ | 
|---|
| 18 |  | 
|---|
| 19 | #include <errno.h> | 
|---|
| 20 | #include <sysdep.h> | 
|---|
| 21 | #include <pthread.h> | 
|---|
| 22 | #include <pthreadP.h> | 
|---|
| 23 | #include <sys/time.h> | 
|---|
| 24 | #include <stap-probe.h> | 
|---|
| 25 | #include <atomic.h> | 
|---|
| 26 | #include <futex-internal.h> | 
|---|
| 27 | #include <time.h> | 
|---|
| 28 |  | 
|---|
| 29 |  | 
|---|
| 30 | /* A reader--writer lock that fulfills the POSIX requirements (but operations | 
|---|
| 31 | on this lock are not necessarily full barriers, as one may interpret the | 
|---|
| 32 | POSIX requirement about "synchronizing memory").  All critical sections are | 
|---|
| 33 | in a total order, writers synchronize with prior writers and readers, and | 
|---|
| 34 | readers synchronize with prior writers. | 
|---|
| 35 |  | 
|---|
| 36 | A thread is allowed to acquire a read lock recursively (i.e., have rdlock | 
|---|
| 37 | critical sections that overlap in sequenced-before) unless the kind of the | 
|---|
| 38 | rwlock is set to PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP. | 
|---|
| 39 |  | 
|---|
| 40 | This lock is built so that workloads of mostly readers can be executed with | 
|---|
| 41 | low runtime overheads.  This matches that the default kind of the lock is | 
|---|
| 42 | PTHREAD_RWLOCK_PREFER_READER_NP.  Acquiring a read lock requires a single | 
|---|
| 43 | atomic addition if the lock is or was previously acquired by other | 
|---|
| 44 | readers; releasing the lock is a single CAS if there are no concurrent | 
|---|
| 45 | writers. | 
|---|
| 46 | Workloads consisting of mostly writers are of secondary importance. | 
|---|
| 47 | An uncontended write lock acquisition is as fast as for a normal | 
|---|
| 48 | exclusive mutex but writer contention is somewhat more costly due to | 
|---|
| 49 | keeping track of the exact number of writers.  If the rwlock kind requests | 
|---|
| 50 | writers to be preferred (i.e., PTHREAD_RWLOCK_PREFER_WRITER_NP or the | 
|---|
| 51 | no-recursive-readers variant of it), then writer--to--writer lock ownership | 
|---|
| 52 | hand-over is fairly fast and bypasses lock acquisition attempts by readers. | 
|---|
| 53 | The costs of lock ownership transfer between readers and writers vary.  If | 
|---|
| 54 | the program asserts that there are no recursive readers and writers are | 
|---|
| 55 | preferred, then write lock acquisition attempts will block subsequent read | 
|---|
| 56 | lock acquisition attempts, so that new incoming readers do not prolong a | 
|---|
| 57 | phase in which readers have acquired the lock. | 
|---|
| 58 |  | 
|---|
| 59 | The main components of the rwlock are a writer-only lock that allows only | 
|---|
| 60 | one of the concurrent writers to be the primary writer, and a | 
|---|
| 61 | single-writer-multiple-readers lock that decides between read phases, in | 
|---|
| 62 | which readers have acquired the rwlock, and write phases in which a primary | 
|---|
| 63 | writer or a sequence of different primary writers have acquired the rwlock. | 
|---|
| 64 |  | 
|---|
| 65 | The single-writer-multiple-readers lock is the central piece of state | 
|---|
| 66 | describing the rwlock and is encoded in the __readers field (see below for | 
|---|
| 67 | a detailed explanation): | 
|---|
| 68 |  | 
|---|
| 69 | State WP  WL  R   RW  Notes | 
|---|
| 70 | --------------------------- | 
|---|
| 71 | #1    0   0   0   0   Lock is idle (and in a read phase). | 
|---|
| 72 | #2    0   0   >0  0   Readers have acquired the lock. | 
|---|
| 73 | #3    0   1   0   0   Lock is not acquired; a writer will try to start a | 
|---|
| 74 | write phase. | 
|---|
| 75 | #4    0   1   >0  0   Readers have acquired the lock; a writer is waiting | 
|---|
| 76 | and explicit hand-over to the writer is required. | 
|---|
| 77 | #4a   0   1   >0  1   Same as #4 except that there are further readers | 
|---|
| 78 | waiting because the writer is to be preferred. | 
|---|
| 79 | #5    1   0   0   0   Lock is idle (and in a write phase). | 
|---|
| 80 | #6    1   0   >0  0   Write phase; readers will try to start a read phase | 
|---|
| 81 | (requires explicit hand-over to all readers that | 
|---|
| 82 | do not start the read phase). | 
|---|
| 83 | #7    1   1   0   0   Lock is acquired by a writer. | 
|---|
| 84 | #8    1   1   >0  0   Lock acquired by a writer and readers are waiting; | 
|---|
| 85 | explicit hand-over to the readers is required. | 
|---|
| 86 |  | 
|---|
| 87 | WP (PTHREAD_RWLOCK_WRPHASE) is true if the lock is in a write phase, so | 
|---|
| 88 | potentially acquired by a primary writer. | 
|---|
| 89 | WL (PTHREAD_RWLOCK_WRLOCKED) is true if there is a primary writer (i.e., | 
|---|
| 90 | the thread that was able to set this bit from false to true). | 
|---|
| 91 | R (all bits in __readers except the number of least-significant bits | 
|---|
| 92 | denoted in PTHREAD_RWLOCK_READER_SHIFT) is the number of readers that have | 
|---|
| 93 | or are trying to acquired the lock.  There may be more readers waiting if | 
|---|
| 94 | writers are preferred and there will be no recursive readers, in which | 
|---|
| 95 | case RW (PTHREAD_RWLOCK_RWAITING) is true in state #4a. | 
|---|
| 96 |  | 
|---|
| 97 | We want to block using futexes but using __readers as a futex word directly | 
|---|
| 98 | is not a good solution.  First, we want to wait on different conditions | 
|---|
| 99 | such as waiting for a phase change vs. waiting for the primary writer to | 
|---|
| 100 | release the writer-only lock.  Second, the number of readers could change | 
|---|
| 101 | frequently, which would make it likely that a writer's futex_wait fails | 
|---|
| 102 | frequently too because the expected value does not match the value of | 
|---|
| 103 | __readers anymore. | 
|---|
| 104 | Therefore, we split out the futex words into the __wrphase_futex and | 
|---|
| 105 | __writers_futex fields.  The former tracks the value of the WP bit and is | 
|---|
| 106 | changed after changing WP by the thread that changes WP.  However, because | 
|---|
| 107 | of the POSIX requirements regarding mutex/rwlock destruction (i.e., that | 
|---|
| 108 | destroying a rwlock is allowed as soon as no thread has acquired or will | 
|---|
| 109 | acquire the lock), we have to be careful and hand over lock ownership (via | 
|---|
| 110 | a phase change) carefully to those threads waiting.  Specifically, we must | 
|---|
| 111 | prevent a situation in which we are not quite sure whether we still have | 
|---|
| 112 | to unblock another thread through a change to memory (executing a | 
|---|
| 113 | futex_wake on a former futex word that is now used for something else is | 
|---|
| 114 | fine). | 
|---|
| 115 | The scheme we use for __wrphase_futex is that waiting threads that may | 
|---|
| 116 | use the futex word to block now all have to use the futex word to block; it | 
|---|
| 117 | is not allowed to take the short-cut and spin-wait on __readers because | 
|---|
| 118 | then the waking thread cannot just make one final change to memory to | 
|---|
| 119 | unblock all potentially waiting threads.  If, for example, a reader | 
|---|
| 120 | increments R in states #7 or #8, it has to then block until __wrphase_futex | 
|---|
| 121 | is 0 and it can confirm that the value of 0 was stored by the primary | 
|---|
| 122 | writer; in turn, the primary writer has to change to a read phase too when | 
|---|
| 123 | releasing WL (i.e., to state #2), and it must change __wrphase_futex to 0 | 
|---|
| 124 | as the next step.  This ensures that the waiting reader will not be able to | 
|---|
| 125 | acquire, release, and then destroy the lock concurrently with the pending | 
|---|
| 126 | futex unblock operations by the former primary writer.  This scheme is | 
|---|
| 127 | called explicit hand-over in what follows. | 
|---|
| 128 | Note that waiting threads can cancel waiting only if explicit hand-over has | 
|---|
| 129 | not yet started (e.g., if __readers is still in states #7 or #8 in the | 
|---|
| 130 | example above). | 
|---|
| 131 |  | 
|---|
| 132 | Writers determine the primary writer through WL.  Blocking using futexes | 
|---|
| 133 | is performed using __writers_futex as a futex word; primary writers will | 
|---|
| 134 | enable waiting on this futex by setting it to 1 after they acquired the WL | 
|---|
| 135 | bit and will disable waiting by setting it to 0 before they release WL. | 
|---|
| 136 | This leaves small windows where blocking using futexes is not possible | 
|---|
| 137 | although a primary writer exists, but in turn decreases complexity of the | 
|---|
| 138 | writer--writer synchronization and does not affect correctness. | 
|---|
| 139 | If writers are preferred, writers can hand over WL directly to other | 
|---|
| 140 | waiting writers that registered by incrementing __writers:  If the primary | 
|---|
| 141 | writer can CAS __writers from a non-zero value to the same value with the | 
|---|
| 142 | PTHREAD_RWLOCK_WRHANDOVER bit set, it effectively transfers WL ownership | 
|---|
| 143 | to one of the registered waiting writers and does not reset WL; in turn, | 
|---|
| 144 | a registered writer that can clear PTHREAD_RWLOCK_WRHANDOVER using a CAS | 
|---|
| 145 | then takes over WL.  Note that registered waiting writers can cancel | 
|---|
| 146 | waiting by decrementing __writers, but the last writer to unregister must | 
|---|
| 147 | become the primary writer if PTHREAD_RWLOCK_WRHANDOVER is set. | 
|---|
| 148 | Also note that adding another state/bit to signal potential writer--writer | 
|---|
| 149 | contention (e.g., as done in the normal mutex algorithm) would not be | 
|---|
| 150 | helpful because we would have to conservatively assume that there is in | 
|---|
| 151 | fact no other writer, and wake up readers too. | 
|---|
| 152 |  | 
|---|
| 153 | To avoid having to call futex_wake when no thread uses __wrphase_futex or | 
|---|
| 154 | __writers_futex, threads will set the PTHREAD_RWLOCK_FUTEX_USED bit in the | 
|---|
| 155 | respective futex words before waiting on it (using a CAS so it will only be | 
|---|
| 156 | set if in a state in which waiting would be possible).  In the case of | 
|---|
| 157 | __writers_futex, we wake only one thread but several threads may share | 
|---|
| 158 | PTHREAD_RWLOCK_FUTEX_USED, so we must assume that there are still others. | 
|---|
| 159 | This is similar to what we do in pthread_mutex_lock.  We do not need to | 
|---|
| 160 | do this for __wrphase_futex because there, we always wake all waiting | 
|---|
| 161 | threads. | 
|---|
| 162 |  | 
|---|
| 163 | Blocking in the state #4a simply uses __readers as futex word.  This | 
|---|
| 164 | simplifies the algorithm but suffers from some of the drawbacks discussed | 
|---|
| 165 | before, though not to the same extent because R can only decrease in this | 
|---|
| 166 | state, so the number of potentially failing futex_wait attempts will be | 
|---|
| 167 | bounded.  All threads moving from state #4a to another state must wake | 
|---|
| 168 | up threads blocked on the __readers futex. | 
|---|
| 169 |  | 
|---|
| 170 | The ordering invariants that we have to take care of in the implementation | 
|---|
| 171 | are primarily those necessary for a reader--writer lock; this is rather | 
|---|
| 172 | straightforward and happens during write/read phase switching (potentially | 
|---|
| 173 | through explicit hand-over), and between writers through synchronization | 
|---|
| 174 | involving the PTHREAD_RWLOCK_WRLOCKED or PTHREAD_RWLOCK_WRHANDOVER bits. | 
|---|
| 175 | Additionally, we need to take care that modifications of __writers_futex | 
|---|
| 176 | and __wrphase_futex (e.g., by otherwise unordered readers) take place in | 
|---|
| 177 | the writer critical sections or read/write phases, respectively, and that | 
|---|
| 178 | explicit hand-over observes stores from the previous phase.  How this is | 
|---|
| 179 | done is explained in more detail in comments in the code. | 
|---|
| 180 |  | 
|---|
| 181 | Many of the accesses to the futex words just need relaxed MO.  This is | 
|---|
| 182 | possible because we essentially drive both the core rwlock synchronization | 
|---|
| 183 | and the futex synchronization in parallel.  For example, an unlock will | 
|---|
| 184 | unlock the rwlock and take part in the futex synchronization (using | 
|---|
| 185 | PTHREAD_RWLOCK_FUTEX_USED, see above); even if they are not tightly | 
|---|
| 186 | ordered in some way, the futex synchronization ensures that there are no | 
|---|
| 187 | lost wake-ups, and woken threads will then eventually see the most recent | 
|---|
| 188 | state of the rwlock.  IOW, waiting threads will always be woken up, while | 
|---|
| 189 | not being able to wait using futexes (which can happen) is harmless; in | 
|---|
| 190 | turn, this means that waiting threads don't need special ordering wrt. | 
|---|
| 191 | waking threads. | 
|---|
| 192 |  | 
|---|
| 193 | The futex synchronization consists of the three-state futex word: | 
|---|
| 194 | (1) cannot block on it, (2) can block on it, and (3) there might be a | 
|---|
| 195 | thread blocked on it (i.e., with PTHREAD_RWLOCK_FUTEX_USED set). | 
|---|
| 196 | Relaxed-MO atomic read-modify-write operations are sufficient to maintain | 
|---|
| 197 | this (e.g., using a CAS to go from (2) to (3) but not from (1) to (3)), | 
|---|
| 198 | but we need ordering of the futex word modifications by the waking threads | 
|---|
| 199 | so that they collectively make correct state changes between (1)-(3). | 
|---|
| 200 | The futex-internal synchronization (i.e., the conceptual critical sections | 
|---|
| 201 | around futex operations in the kernel) then ensures that even an | 
|---|
| 202 | unconstrained load (i.e., relaxed MO) inside of futex_wait will not lead to | 
|---|
| 203 | lost wake-ups because either the waiting thread will see the change from | 
|---|
| 204 | (3) to (1) when a futex_wake came first, or this futex_wake will wake this | 
|---|
| 205 | waiting thread because the waiting thread came first. | 
|---|
| 206 |  | 
|---|
| 207 |  | 
|---|
| 208 | POSIX allows but does not require rwlock acquisitions to be a cancellation | 
|---|
| 209 | point.  We do not support cancellation. | 
|---|
| 210 |  | 
|---|
| 211 | TODO We do not try to elide any read or write lock acquisitions currently. | 
|---|
| 212 | While this would be possible, it is unclear whether HTM performance is | 
|---|
| 213 | currently predictable enough and our runtime tuning is good enough at | 
|---|
| 214 | deciding when to use elision so that enabling it would lead to consistently | 
|---|
| 215 | better performance.  */ | 
|---|
| 216 |  | 
|---|
| 217 |  | 
|---|
| 218 | static int | 
|---|
| 219 | __pthread_rwlock_get_private (pthread_rwlock_t *rwlock) | 
|---|
| 220 | { | 
|---|
| 221 | return rwlock->__data.__shared != 0 ? FUTEX_SHARED : FUTEX_PRIVATE; | 
|---|
| 222 | } | 
|---|
| 223 |  | 
|---|
| 224 | static __always_inline void | 
|---|
| 225 | __pthread_rwlock_rdunlock (pthread_rwlock_t *rwlock) | 
|---|
| 226 | { | 
|---|
| 227 | int private = __pthread_rwlock_get_private (rwlock); | 
|---|
| 228 | /* We decrease the number of readers, and if we are the last reader and | 
|---|
| 229 | there is a primary writer, we start a write phase.  We use a CAS to | 
|---|
| 230 | make this atomic so that it is clear whether we must hand over ownership | 
|---|
| 231 | explicitly.  */ | 
|---|
| 232 | unsigned int r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 233 | unsigned int rnew; | 
|---|
| 234 | for (;;) | 
|---|
| 235 | { | 
|---|
| 236 | rnew = r - (1 << PTHREAD_RWLOCK_READER_SHIFT); | 
|---|
| 237 | /* If we are the last reader, we also need to unblock any readers | 
|---|
| 238 | that are waiting for a writer to go first (PTHREAD_RWLOCK_RWAITING) | 
|---|
| 239 | so that they can register while the writer is active.  */ | 
|---|
| 240 | if ((rnew >> PTHREAD_RWLOCK_READER_SHIFT) == 0) | 
|---|
| 241 | { | 
|---|
| 242 | if ((rnew & PTHREAD_RWLOCK_WRLOCKED) != 0) | 
|---|
| 243 | rnew |= PTHREAD_RWLOCK_WRPHASE; | 
|---|
| 244 | rnew &= ~(unsigned int) PTHREAD_RWLOCK_RWAITING; | 
|---|
| 245 | } | 
|---|
| 246 | /* We need release MO here for three reasons.  First, so that we | 
|---|
| 247 | synchronize with subsequent writers.  Second, we might have been the | 
|---|
| 248 | first reader and set __wrphase_futex to 0, so we need to synchronize | 
|---|
| 249 | with the last reader that will set it to 1 (note that we will always | 
|---|
| 250 | change __readers before the last reader, or we are the last reader). | 
|---|
| 251 | Third, a writer that takes part in explicit hand-over needs to see | 
|---|
| 252 | the first reader's store to __wrphase_futex (or a later value) if | 
|---|
| 253 | the writer observes that a write phase has been started.  */ | 
|---|
| 254 | if (atomic_compare_exchange_weak_release (&rwlock->__data.__readers, | 
|---|
| 255 | &r, rnew)) | 
|---|
| 256 | break; | 
|---|
| 257 | /* TODO Back-off.  */ | 
|---|
| 258 | } | 
|---|
| 259 | if ((rnew & PTHREAD_RWLOCK_WRPHASE) != 0) | 
|---|
| 260 | { | 
|---|
| 261 | /* We need to do explicit hand-over.  We need the acquire MO fence so | 
|---|
| 262 | that our modification of _wrphase_futex happens after a store by | 
|---|
| 263 | another reader that started a read phase.  Relaxed MO is sufficient | 
|---|
| 264 | for the modification of __wrphase_futex because it is just used | 
|---|
| 265 | to delay acquisition by a writer until all threads are unblocked | 
|---|
| 266 | irrespective of whether they are looking at __readers or | 
|---|
| 267 | __wrphase_futex; any other synchronizes-with relations that are | 
|---|
| 268 | necessary are established through __readers.  */ | 
|---|
| 269 | atomic_thread_fence_acquire (); | 
|---|
| 270 | if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 1) | 
|---|
| 271 | & PTHREAD_RWLOCK_FUTEX_USED) != 0) | 
|---|
| 272 | futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private); | 
|---|
| 273 | } | 
|---|
| 274 | /* Also wake up waiting readers if we did reset the RWAITING flag.  */ | 
|---|
| 275 | if ((r & PTHREAD_RWLOCK_RWAITING) != (rnew & PTHREAD_RWLOCK_RWAITING)) | 
|---|
| 276 | futex_wake (&rwlock->__data.__readers, INT_MAX, private); | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 |  | 
|---|
| 280 | static __always_inline int | 
|---|
| 281 | __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock, | 
|---|
| 282 | clockid_t clockid, | 
|---|
| 283 | const struct timespec *abstime) | 
|---|
| 284 | { | 
|---|
| 285 | unsigned int r; | 
|---|
| 286 |  | 
|---|
| 287 | /* Make sure any passed in clockid and timeout value are valid.  Note that | 
|---|
| 288 | the previous implementation assumed that this check *must* not be | 
|---|
| 289 | performed if there would in fact be no blocking; however, POSIX only | 
|---|
| 290 | requires that "the validity of the abstime parameter need not be checked | 
|---|
| 291 | if the lock can be immediately acquired" (i.e., we need not but may check | 
|---|
| 292 | it).  */ | 
|---|
| 293 | if (abstime && __glibc_unlikely (!futex_abstimed_supported_clockid (clockid) | 
|---|
| 294 | || ! valid_nanoseconds (abstime->tv_nsec))) | 
|---|
| 295 | return EINVAL; | 
|---|
| 296 |  | 
|---|
| 297 | /* Make sure we are not holding the rwlock as a writer.  This is a deadlock | 
|---|
| 298 | situation we recognize and report.  */ | 
|---|
| 299 | if (__glibc_unlikely (atomic_load_relaxed (&rwlock->__data.__cur_writer) | 
|---|
| 300 | == THREAD_GETMEM (THREAD_SELF, tid))) | 
|---|
| 301 | return EDEADLK; | 
|---|
| 302 |  | 
|---|
| 303 | /* If we prefer writers, recursive rdlock is disallowed, we are in a read | 
|---|
| 304 | phase, and there are other readers present, we try to wait without | 
|---|
| 305 | extending the read phase.  We will be unblocked by either one of the | 
|---|
| 306 | other active readers, or if the writer gives up WRLOCKED (e.g., on | 
|---|
| 307 | timeout). | 
|---|
| 308 | If there are no other readers, we simply race with any existing primary | 
|---|
| 309 | writer; it would have been a race anyway, and changing the odds slightly | 
|---|
| 310 | will likely not make a big difference.  */ | 
|---|
| 311 | if (rwlock->__data.__flags == PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP) | 
|---|
| 312 | { | 
|---|
| 313 | r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 314 | while ((r & PTHREAD_RWLOCK_WRPHASE) == 0 | 
|---|
| 315 | && (r & PTHREAD_RWLOCK_WRLOCKED) != 0 | 
|---|
| 316 | && (r >> PTHREAD_RWLOCK_READER_SHIFT) > 0) | 
|---|
| 317 | { | 
|---|
| 318 | /* TODO Spin first.  */ | 
|---|
| 319 | /* Try setting the flag signaling that we are waiting without having | 
|---|
| 320 | incremented the number of readers.  Relaxed MO is fine because | 
|---|
| 321 | this is just about waiting for a state change in __readers.  */ | 
|---|
| 322 | if (atomic_compare_exchange_weak_relaxed | 
|---|
| 323 | (&rwlock->__data.__readers, &r, r | PTHREAD_RWLOCK_RWAITING)) | 
|---|
| 324 | { | 
|---|
| 325 | /* Wait for as long as the flag is set.  An ABA situation is | 
|---|
| 326 | harmless because the flag is just about the state of | 
|---|
| 327 | __readers, and all threads set the flag under the same | 
|---|
| 328 | conditions.  */ | 
|---|
| 329 | while (((r = atomic_load_relaxed (&rwlock->__data.__readers)) | 
|---|
| 330 | & PTHREAD_RWLOCK_RWAITING) != 0) | 
|---|
| 331 | { | 
|---|
| 332 | int private = __pthread_rwlock_get_private (rwlock); | 
|---|
| 333 | int err = futex_abstimed_wait (&rwlock->__data.__readers, | 
|---|
| 334 | r, clockid, abstime, private); | 
|---|
| 335 | /* We ignore EAGAIN and EINTR.  On time-outs, we can just | 
|---|
| 336 | return because we don't need to clean up anything.  */ | 
|---|
| 337 | if (err == ETIMEDOUT) | 
|---|
| 338 | return err; | 
|---|
| 339 | } | 
|---|
| 340 | /* It makes sense to not break out of the outer loop here | 
|---|
| 341 | because we might be in the same situation again.  */ | 
|---|
| 342 | } | 
|---|
| 343 | else | 
|---|
| 344 | { | 
|---|
| 345 | /* TODO Back-off.  */ | 
|---|
| 346 | } | 
|---|
| 347 | } | 
|---|
| 348 | } | 
|---|
| 349 | /* Register as a reader, using an add-and-fetch so that R can be used as | 
|---|
| 350 | expected value for future operations.  Acquire MO so we synchronize with | 
|---|
| 351 | prior writers as well as the last reader of the previous read phase (see | 
|---|
| 352 | below).  */ | 
|---|
| 353 | r = (atomic_fetch_add_acquire (&rwlock->__data.__readers, | 
|---|
| 354 | (1 << PTHREAD_RWLOCK_READER_SHIFT)) | 
|---|
| 355 | + (1 << PTHREAD_RWLOCK_READER_SHIFT)); | 
|---|
| 356 |  | 
|---|
| 357 | /* Check whether there is an overflow in the number of readers.  We assume | 
|---|
| 358 | that the total number of threads is less than half the maximum number | 
|---|
| 359 | of readers that we have bits for in __readers (i.e., with 32-bit int and | 
|---|
| 360 | PTHREAD_RWLOCK_READER_SHIFT of 3, we assume there are less than | 
|---|
| 361 | 1 << (32-3-1) concurrent threads). | 
|---|
| 362 | If there is an overflow, we use a CAS to try to decrement the number of | 
|---|
| 363 | readers if there still is an overflow situation.  If so, we return | 
|---|
| 364 | EAGAIN; if not, we are not a thread causing an overflow situation, and so | 
|---|
| 365 | we just continue.  Using a fetch-add instead of the CAS isn't possible | 
|---|
| 366 | because other readers might release the lock concurrently, which could | 
|---|
| 367 | make us the last reader and thus responsible for handing ownership over | 
|---|
| 368 | to writers (which requires a CAS too to make the decrement and ownership | 
|---|
| 369 | transfer indivisible).  */ | 
|---|
| 370 | while (__glibc_unlikely (r >= PTHREAD_RWLOCK_READER_OVERFLOW)) | 
|---|
| 371 | { | 
|---|
| 372 | /* Relaxed MO is okay because we just want to undo our registration and | 
|---|
| 373 | cannot have changed the rwlock state substantially if the CAS | 
|---|
| 374 | succeeds.  */ | 
|---|
| 375 | if (atomic_compare_exchange_weak_relaxed | 
|---|
| 376 | (&rwlock->__data.__readers, | 
|---|
| 377 | &r, r - (1 << PTHREAD_RWLOCK_READER_SHIFT))) | 
|---|
| 378 | return EAGAIN; | 
|---|
| 379 | } | 
|---|
| 380 |  | 
|---|
| 381 | /* We have registered as a reader, so if we are in a read phase, we have | 
|---|
| 382 | acquired a read lock.  This is also the reader--reader fast-path. | 
|---|
| 383 | Even if there is a primary writer, we just return.  If writers are to | 
|---|
| 384 | be preferred and we are the only active reader, we could try to enter a | 
|---|
| 385 | write phase to let the writer proceed.  This would be okay because we | 
|---|
| 386 | cannot have acquired the lock previously as a reader (which could result | 
|---|
| 387 | in deadlock if we would wait for the primary writer to run).  However, | 
|---|
| 388 | this seems to be a corner case and handling it specially not be worth the | 
|---|
| 389 | complexity.  */ | 
|---|
| 390 | if (__glibc_likely ((r & PTHREAD_RWLOCK_WRPHASE) == 0)) | 
|---|
| 391 | return 0; | 
|---|
| 392 | /* Otherwise, if we were in a write phase (states #6 or #8), we must wait | 
|---|
| 393 | for explicit hand-over of the read phase; the only exception is if we | 
|---|
| 394 | can start a read phase if there is no primary writer currently.  */ | 
|---|
| 395 | while ((r & PTHREAD_RWLOCK_WRPHASE) != 0 | 
|---|
| 396 | && (r & PTHREAD_RWLOCK_WRLOCKED) == 0) | 
|---|
| 397 | { | 
|---|
| 398 | /* Try to enter a read phase: If the CAS below succeeds, we have | 
|---|
| 399 | ownership; if it fails, we will simply retry and reassess the | 
|---|
| 400 | situation. | 
|---|
| 401 | Acquire MO so we synchronize with prior writers.  */ | 
|---|
| 402 | if (atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers, &r, | 
|---|
| 403 | r ^ PTHREAD_RWLOCK_WRPHASE)) | 
|---|
| 404 | { | 
|---|
| 405 | /* We started the read phase, so we are also responsible for | 
|---|
| 406 | updating the write-phase futex.  Relaxed MO is sufficient. | 
|---|
| 407 | We have to do the same steps as a writer would when handing | 
|---|
| 408 | over the read phase to us because other readers cannot | 
|---|
| 409 | distinguish between us and the writer; this includes | 
|---|
| 410 | explicit hand-over and potentially having to wake other readers | 
|---|
| 411 | (but we can pretend to do the setting and unsetting of WRLOCKED | 
|---|
| 412 | atomically, and thus can skip this step).  */ | 
|---|
| 413 | if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 0) | 
|---|
| 414 | & PTHREAD_RWLOCK_FUTEX_USED) != 0) | 
|---|
| 415 | { | 
|---|
| 416 | int private = __pthread_rwlock_get_private (rwlock); | 
|---|
| 417 | futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private); | 
|---|
| 418 | } | 
|---|
| 419 | return 0; | 
|---|
| 420 | } | 
|---|
| 421 | else | 
|---|
| 422 | { | 
|---|
| 423 | /* TODO Back off before retrying.  Also see above.  */ | 
|---|
| 424 | } | 
|---|
| 425 | } | 
|---|
| 426 |  | 
|---|
| 427 | /* We were in a write phase but did not install the read phase.  We cannot | 
|---|
| 428 | distinguish between a writer and another reader starting the read phase, | 
|---|
| 429 | so we must wait for explicit hand-over via __wrphase_futex. | 
|---|
| 430 | However, __wrphase_futex might not have been set to 1 yet (either | 
|---|
| 431 | because explicit hand-over to the writer is still ongoing, or because | 
|---|
| 432 | the writer has started the write phase but has not yet updated | 
|---|
| 433 | __wrphase_futex).  The least recent value of __wrphase_futex we can | 
|---|
| 434 | read from here is the modification of the last read phase (because | 
|---|
| 435 | we synchronize with the last reader in this read phase through | 
|---|
| 436 | __readers; see the use of acquire MO on the fetch_add above). | 
|---|
| 437 | Therefore, if we observe a value of 0 for __wrphase_futex, we need | 
|---|
| 438 | to subsequently check that __readers now indicates a read phase; we | 
|---|
| 439 | need to use acquire MO for this so that if we observe a read phase, | 
|---|
| 440 | we will also see the modification of __wrphase_futex by the previous | 
|---|
| 441 | writer.  We then need to load __wrphase_futex again and continue to | 
|---|
| 442 | wait if it is not 0, so that we do not skip explicit hand-over. | 
|---|
| 443 | Relaxed MO is sufficient for the load from __wrphase_futex because | 
|---|
| 444 | we just use it as an indicator for when we can proceed; we use | 
|---|
| 445 | __readers and the acquire MO accesses to it to eventually read from | 
|---|
| 446 | the proper stores to __wrphase_futex.  */ | 
|---|
| 447 | unsigned int wpf; | 
|---|
| 448 | bool ready = false; | 
|---|
| 449 | for (;;) | 
|---|
| 450 | { | 
|---|
| 451 | while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex)) | 
|---|
| 452 | | PTHREAD_RWLOCK_FUTEX_USED) == (1 | PTHREAD_RWLOCK_FUTEX_USED)) | 
|---|
| 453 | { | 
|---|
| 454 | int private = __pthread_rwlock_get_private (rwlock); | 
|---|
| 455 | if (((wpf & PTHREAD_RWLOCK_FUTEX_USED) == 0) | 
|---|
| 456 | && (!atomic_compare_exchange_weak_relaxed | 
|---|
| 457 | (&rwlock->__data.__wrphase_futex, | 
|---|
| 458 | &wpf, wpf | PTHREAD_RWLOCK_FUTEX_USED))) | 
|---|
| 459 | continue; | 
|---|
| 460 | int err = futex_abstimed_wait (&rwlock->__data.__wrphase_futex, | 
|---|
| 461 | 1 | PTHREAD_RWLOCK_FUTEX_USED, | 
|---|
| 462 | clockid, abstime, private); | 
|---|
| 463 | if (err == ETIMEDOUT) | 
|---|
| 464 | { | 
|---|
| 465 | /* If we timed out, we need to unregister.  If no read phase | 
|---|
| 466 | has been installed while we waited, we can just decrement | 
|---|
| 467 | the number of readers.  Otherwise, we just acquire the | 
|---|
| 468 | lock, which is allowed because we give no precise timing | 
|---|
| 469 | guarantees, and because the timeout is only required to | 
|---|
| 470 | be in effect if we would have had to wait for other | 
|---|
| 471 | threads (e.g., if futex_wait would time-out immediately | 
|---|
| 472 | because the given absolute time is in the past).  */ | 
|---|
| 473 | r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 474 | while ((r & PTHREAD_RWLOCK_WRPHASE) != 0) | 
|---|
| 475 | { | 
|---|
| 476 | /* We don't need to make anything else visible to | 
|---|
| 477 | others besides unregistering, so relaxed MO is | 
|---|
| 478 | sufficient.  */ | 
|---|
| 479 | if (atomic_compare_exchange_weak_relaxed | 
|---|
| 480 | (&rwlock->__data.__readers, &r, | 
|---|
| 481 | r - (1 << PTHREAD_RWLOCK_READER_SHIFT))) | 
|---|
| 482 | return ETIMEDOUT; | 
|---|
| 483 | /* TODO Back-off.  */ | 
|---|
| 484 | } | 
|---|
| 485 | /* Use the acquire MO fence to mirror the steps taken in the | 
|---|
| 486 | non-timeout case.  Note that the read can happen both | 
|---|
| 487 | in the atomic_load above as well as in the failure case | 
|---|
| 488 | of the CAS operation.  */ | 
|---|
| 489 | atomic_thread_fence_acquire (); | 
|---|
| 490 | /* We still need to wait for explicit hand-over, but we must | 
|---|
| 491 | not use futex_wait anymore because we would just time out | 
|---|
| 492 | in this case and thus make the spin-waiting we need | 
|---|
| 493 | unnecessarily expensive.  */ | 
|---|
| 494 | while ((atomic_load_relaxed (&rwlock->__data.__wrphase_futex) | 
|---|
| 495 | | PTHREAD_RWLOCK_FUTEX_USED) | 
|---|
| 496 | == (1 | PTHREAD_RWLOCK_FUTEX_USED)) | 
|---|
| 497 | { | 
|---|
| 498 | /* TODO Back-off?  */ | 
|---|
| 499 | } | 
|---|
| 500 | ready = true; | 
|---|
| 501 | break; | 
|---|
| 502 | } | 
|---|
| 503 | /* If we got interrupted (EINTR) or the futex word does not have the | 
|---|
| 504 | expected value (EAGAIN), retry.  */ | 
|---|
| 505 | } | 
|---|
| 506 | if (ready) | 
|---|
| 507 | /* See below.  */ | 
|---|
| 508 | break; | 
|---|
| 509 | /* We need acquire MO here so that we synchronize with the lock | 
|---|
| 510 | release of the writer, and so that we observe a recent value of | 
|---|
| 511 | __wrphase_futex (see below).  */ | 
|---|
| 512 | if ((atomic_load_acquire (&rwlock->__data.__readers) | 
|---|
| 513 | & PTHREAD_RWLOCK_WRPHASE) == 0) | 
|---|
| 514 | /* We are in a read phase now, so the least recent modification of | 
|---|
| 515 | __wrphase_futex we can read from is the store by the writer | 
|---|
| 516 | with value 1.  Thus, only now we can assume that if we observe | 
|---|
| 517 | a value of 0, explicit hand-over is finished. Retry the loop | 
|---|
| 518 | above one more time.  */ | 
|---|
| 519 | ready = true; | 
|---|
| 520 | } | 
|---|
| 521 |  | 
|---|
| 522 | return 0; | 
|---|
| 523 | } | 
|---|
| 524 |  | 
|---|
| 525 |  | 
|---|
| 526 | static __always_inline void | 
|---|
| 527 | __pthread_rwlock_wrunlock (pthread_rwlock_t *rwlock) | 
|---|
| 528 | { | 
|---|
| 529 | int private = __pthread_rwlock_get_private (rwlock); | 
|---|
| 530 |  | 
|---|
| 531 | atomic_store_relaxed (&rwlock->__data.__cur_writer, 0); | 
|---|
| 532 | /* Disable waiting by writers.  We will wake up after we decided how to | 
|---|
| 533 | proceed.  */ | 
|---|
| 534 | bool wake_writers | 
|---|
| 535 | = ((atomic_exchange_relaxed (&rwlock->__data.__writers_futex, 0) | 
|---|
| 536 | & PTHREAD_RWLOCK_FUTEX_USED) != 0); | 
|---|
| 537 |  | 
|---|
| 538 | if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP) | 
|---|
| 539 | { | 
|---|
| 540 | /* First, try to hand over to another writer.  */ | 
|---|
| 541 | unsigned int w = atomic_load_relaxed (&rwlock->__data.__writers); | 
|---|
| 542 | while (w != 0) | 
|---|
| 543 | { | 
|---|
| 544 | /* Release MO so that another writer that gets WRLOCKED from us will | 
|---|
| 545 | synchronize with us and thus can take over our view of | 
|---|
| 546 | __readers (including, for example, whether we are in a write | 
|---|
| 547 | phase or not).  */ | 
|---|
| 548 | if (atomic_compare_exchange_weak_release | 
|---|
| 549 | (&rwlock->__data.__writers, &w, w | PTHREAD_RWLOCK_WRHANDOVER)) | 
|---|
| 550 | /* Another writer will take over.  */ | 
|---|
| 551 | goto done; | 
|---|
| 552 | /* TODO Back-off.  */ | 
|---|
| 553 | } | 
|---|
| 554 | } | 
|---|
| 555 |  | 
|---|
| 556 | /* We have done everything we needed to do to prefer writers, so now we | 
|---|
| 557 | either hand over explicitly to readers if there are any, or we simply | 
|---|
| 558 | stay in a write phase.  See pthread_rwlock_rdunlock for more details.  */ | 
|---|
| 559 | unsigned int r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 560 | /* Release MO so that subsequent readers or writers synchronize with us.  */ | 
|---|
| 561 | while (!atomic_compare_exchange_weak_release | 
|---|
| 562 | (&rwlock->__data.__readers, &r, | 
|---|
| 563 | ((r ^ PTHREAD_RWLOCK_WRLOCKED) | 
|---|
| 564 | ^ ((r >> PTHREAD_RWLOCK_READER_SHIFT) == 0 ? 0 | 
|---|
| 565 | : PTHREAD_RWLOCK_WRPHASE)))) | 
|---|
| 566 | { | 
|---|
| 567 | /* TODO Back-off.  */ | 
|---|
| 568 | } | 
|---|
| 569 | if ((r >> PTHREAD_RWLOCK_READER_SHIFT) != 0) | 
|---|
| 570 | { | 
|---|
| 571 | /* We must hand over explicitly through __wrphase_futex.  Relaxed MO is | 
|---|
| 572 | sufficient because it is just used to delay acquisition by a writer; | 
|---|
| 573 | any other synchronizes-with relations that are necessary are | 
|---|
| 574 | established through __readers.  */ | 
|---|
| 575 | if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 0) | 
|---|
| 576 | & PTHREAD_RWLOCK_FUTEX_USED) != 0) | 
|---|
| 577 | futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private); | 
|---|
| 578 | } | 
|---|
| 579 |  | 
|---|
| 580 | done: | 
|---|
| 581 | /* We released WRLOCKED in some way, so wake a writer.  */ | 
|---|
| 582 | if (wake_writers) | 
|---|
| 583 | futex_wake (&rwlock->__data.__writers_futex, 1, private); | 
|---|
| 584 | } | 
|---|
| 585 |  | 
|---|
| 586 |  | 
|---|
| 587 | static __always_inline int | 
|---|
| 588 | __pthread_rwlock_wrlock_full (pthread_rwlock_t *rwlock, | 
|---|
| 589 | clockid_t clockid, | 
|---|
| 590 | const struct timespec *abstime) | 
|---|
| 591 | { | 
|---|
| 592 | /* Make sure any passed in clockid and timeout value are valid.  Note that | 
|---|
| 593 | the previous implementation assumed that this check *must* not be | 
|---|
| 594 | performed if there would in fact be no blocking; however, POSIX only | 
|---|
| 595 | requires that "the validity of the abstime parameter need not be checked | 
|---|
| 596 | if the lock can be immediately acquired" (i.e., we need not but may check | 
|---|
| 597 | it).  */ | 
|---|
| 598 | if (abstime && __glibc_unlikely (!futex_abstimed_supported_clockid (clockid) | 
|---|
| 599 | || ! valid_nanoseconds (abstime->tv_nsec))) | 
|---|
| 600 | return EINVAL; | 
|---|
| 601 |  | 
|---|
| 602 | /* Make sure we are not holding the rwlock as a writer.  This is a deadlock | 
|---|
| 603 | situation we recognize and report.  */ | 
|---|
| 604 | if (__glibc_unlikely (atomic_load_relaxed (&rwlock->__data.__cur_writer) | 
|---|
| 605 | == THREAD_GETMEM (THREAD_SELF, tid))) | 
|---|
| 606 | return EDEADLK; | 
|---|
| 607 |  | 
|---|
| 608 | /* First we try to acquire the role of primary writer by setting WRLOCKED; | 
|---|
| 609 | if it was set before, there already is a primary writer.  Acquire MO so | 
|---|
| 610 | that we synchronize with previous primary writers. | 
|---|
| 611 |  | 
|---|
| 612 | We do not try to change to a write phase right away using a fetch_or | 
|---|
| 613 | because we would have to reset it again and wake readers if there are | 
|---|
| 614 | readers present (some readers could try to acquire the lock more than | 
|---|
| 615 | once, so setting a write phase in the middle of this could cause | 
|---|
| 616 | deadlock).  Changing to a write phase eagerly would only speed up the | 
|---|
| 617 | transition from a read phase to a write phase in the uncontended case, | 
|---|
| 618 | but it would slow down the contended case if readers are preferred (which | 
|---|
| 619 | is the default). | 
|---|
| 620 | We could try to CAS from a state with no readers to a write phase, but | 
|---|
| 621 | this could be less scalable if readers arrive and leave frequently.  */ | 
|---|
| 622 | bool may_share_futex_used_flag = false; | 
|---|
| 623 | unsigned int r = atomic_fetch_or_acquire (&rwlock->__data.__readers, | 
|---|
| 624 | PTHREAD_RWLOCK_WRLOCKED); | 
|---|
| 625 | if (__glibc_unlikely ((r & PTHREAD_RWLOCK_WRLOCKED) != 0)) | 
|---|
| 626 | { | 
|---|
| 627 | /* There is another primary writer.  */ | 
|---|
| 628 | bool prefer_writer | 
|---|
| 629 | = (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP); | 
|---|
| 630 | if (prefer_writer) | 
|---|
| 631 | { | 
|---|
| 632 | /* We register as a waiting writer, so that we can make use of | 
|---|
| 633 | writer--writer hand-over.  Relaxed MO is fine because we just | 
|---|
| 634 | want to register.  We assume that the maximum number of threads | 
|---|
| 635 | is less than the capacity in __writers.  */ | 
|---|
| 636 | atomic_fetch_add_relaxed (&rwlock->__data.__writers, 1); | 
|---|
| 637 | } | 
|---|
| 638 | for (;;) | 
|---|
| 639 | { | 
|---|
| 640 | /* TODO Spin until WRLOCKED is 0 before trying the CAS below. | 
|---|
| 641 | But pay attention to not delay trying writer--writer hand-over | 
|---|
| 642 | for too long (which we must try eventually anyway).  */ | 
|---|
| 643 | if ((r & PTHREAD_RWLOCK_WRLOCKED) == 0) | 
|---|
| 644 | { | 
|---|
| 645 | /* Try to become the primary writer or retry.  Acquire MO as in | 
|---|
| 646 | the fetch_or above.  */ | 
|---|
| 647 | if (atomic_compare_exchange_weak_acquire | 
|---|
| 648 | (&rwlock->__data.__readers, &r, r | PTHREAD_RWLOCK_WRLOCKED)) | 
|---|
| 649 | { | 
|---|
| 650 | if (prefer_writer) | 
|---|
| 651 | { | 
|---|
| 652 | /* Unregister as a waiting writer.  Note that because we | 
|---|
| 653 | acquired WRLOCKED, WRHANDOVER will not be set. | 
|---|
| 654 | Acquire MO on the CAS above ensures that | 
|---|
| 655 | unregistering happens after the previous writer; | 
|---|
| 656 | this sorts the accesses to __writers by all | 
|---|
| 657 | primary writers in a useful way (e.g., any other | 
|---|
| 658 | primary writer acquiring after us or getting it from | 
|---|
| 659 | us through WRHANDOVER will see both our changes to | 
|---|
| 660 | __writers). | 
|---|
| 661 | ??? Perhaps this is not strictly necessary for | 
|---|
| 662 | reasons we do not yet know of.  */ | 
|---|
| 663 | atomic_fetch_add_relaxed (&rwlock->__data.__writers, -1); | 
|---|
| 664 | } | 
|---|
| 665 | break; | 
|---|
| 666 | } | 
|---|
| 667 | /* Retry if the CAS fails (r will have been updated).  */ | 
|---|
| 668 | continue; | 
|---|
| 669 | } | 
|---|
| 670 | /* If writer--writer hand-over is available, try to become the | 
|---|
| 671 | primary writer this way by grabbing the WRHANDOVER token.  If we | 
|---|
| 672 | succeed, we own WRLOCKED.  */ | 
|---|
| 673 | if (prefer_writer) | 
|---|
| 674 | { | 
|---|
| 675 | unsigned int w = atomic_load_relaxed (&rwlock->__data.__writers); | 
|---|
| 676 | if ((w & PTHREAD_RWLOCK_WRHANDOVER) != 0) | 
|---|
| 677 | { | 
|---|
| 678 | /* Acquire MO is required here so that we synchronize with | 
|---|
| 679 | the writer that handed over WRLOCKED.  We also need this | 
|---|
| 680 | for the reload of __readers below because our view of | 
|---|
| 681 | __readers must be at least as recent as the view of the | 
|---|
| 682 | writer that handed over WRLOCKED; we must avoid an ABA | 
|---|
| 683 | through WRHANDOVER, which could, for example, lead to us | 
|---|
| 684 | assuming we are still in a write phase when in fact we | 
|---|
| 685 | are not.  */ | 
|---|
| 686 | if (atomic_compare_exchange_weak_acquire | 
|---|
| 687 | (&rwlock->__data.__writers, | 
|---|
| 688 | &w, (w - PTHREAD_RWLOCK_WRHANDOVER - 1))) | 
|---|
| 689 | { | 
|---|
| 690 | /* Reload so our view is consistent with the view of | 
|---|
| 691 | the previous owner of WRLOCKED.  See above.  */ | 
|---|
| 692 | r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 693 | break; | 
|---|
| 694 | } | 
|---|
| 695 | /* We do not need to reload __readers here.  We should try | 
|---|
| 696 | to perform writer--writer hand-over if possible; if it | 
|---|
| 697 | is not possible anymore, we will reload __readers | 
|---|
| 698 | elsewhere in this loop.  */ | 
|---|
| 699 | continue; | 
|---|
| 700 | } | 
|---|
| 701 | } | 
|---|
| 702 | /* We did not acquire WRLOCKED nor were able to use writer--writer | 
|---|
| 703 | hand-over, so we block on __writers_futex.  */ | 
|---|
| 704 | int private = __pthread_rwlock_get_private (rwlock); | 
|---|
| 705 | unsigned int wf | 
|---|
| 706 | = atomic_load_relaxed (&rwlock->__data.__writers_futex); | 
|---|
| 707 | if (((wf & ~(unsigned int) PTHREAD_RWLOCK_FUTEX_USED) != 1) | 
|---|
| 708 | || ((wf != (1 | PTHREAD_RWLOCK_FUTEX_USED)) | 
|---|
| 709 | && (!atomic_compare_exchange_weak_relaxed | 
|---|
| 710 | (&rwlock->__data.__writers_futex, &wf, | 
|---|
| 711 | 1 | PTHREAD_RWLOCK_FUTEX_USED)))) | 
|---|
| 712 | { | 
|---|
| 713 | /* If we cannot block on __writers_futex because there is no | 
|---|
| 714 | primary writer, or we cannot set PTHREAD_RWLOCK_FUTEX_USED, | 
|---|
| 715 | we retry.  We must reload __readers here in case we cannot | 
|---|
| 716 | block on __writers_futex so that we can become the primary | 
|---|
| 717 | writer and are not stuck in a loop that just continuously | 
|---|
| 718 | fails to block on __writers_futex.  */ | 
|---|
| 719 | r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 720 | continue; | 
|---|
| 721 | } | 
|---|
| 722 | /* We set the flag that signals that the futex is used, or we could | 
|---|
| 723 | have set it if we had been faster than other waiters.  As a | 
|---|
| 724 | result, we may share the flag with an unknown number of other | 
|---|
| 725 | writers.  Therefore, we must keep this flag set when we acquire | 
|---|
| 726 | the lock.  We do not need to do this when we do not reach this | 
|---|
| 727 | point here because then we are not part of the group that may | 
|---|
| 728 | share the flag, and another writer will wake one of the writers | 
|---|
| 729 | in this group.  */ | 
|---|
| 730 | may_share_futex_used_flag = true; | 
|---|
| 731 | int err = futex_abstimed_wait (&rwlock->__data.__writers_futex, | 
|---|
| 732 | 1 | PTHREAD_RWLOCK_FUTEX_USED, | 
|---|
| 733 | clockid, abstime, private); | 
|---|
| 734 | if (err == ETIMEDOUT) | 
|---|
| 735 | { | 
|---|
| 736 | if (prefer_writer) | 
|---|
| 737 | { | 
|---|
| 738 | /* We need to unregister as a waiting writer.  If we are the | 
|---|
| 739 | last writer and writer--writer hand-over is available, | 
|---|
| 740 | we must make use of it because nobody else will reset | 
|---|
| 741 | WRLOCKED otherwise.  (If we use it, we simply pretend | 
|---|
| 742 | that this happened before the timeout; see | 
|---|
| 743 | pthread_rwlock_rdlock_full for the full reasoning.) | 
|---|
| 744 | Also see the similar code above.  */ | 
|---|
| 745 | unsigned int w | 
|---|
| 746 | = atomic_load_relaxed (&rwlock->__data.__writers); | 
|---|
| 747 | while (!atomic_compare_exchange_weak_acquire | 
|---|
| 748 | (&rwlock->__data.__writers, &w, | 
|---|
| 749 | (w == PTHREAD_RWLOCK_WRHANDOVER + 1 ? 0 : w - 1))) | 
|---|
| 750 | { | 
|---|
| 751 | /* TODO Back-off.  */ | 
|---|
| 752 | } | 
|---|
| 753 | if (w == PTHREAD_RWLOCK_WRHANDOVER + 1) | 
|---|
| 754 | { | 
|---|
| 755 | /* We must continue as primary writer.  See above.  */ | 
|---|
| 756 | r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 757 | break; | 
|---|
| 758 | } | 
|---|
| 759 | } | 
|---|
| 760 | /* We cleaned up and cannot have stolen another waiting writer's | 
|---|
| 761 | futex wake-up, so just return.  */ | 
|---|
| 762 | return ETIMEDOUT; | 
|---|
| 763 | } | 
|---|
| 764 | /* If we got interrupted (EINTR) or the futex word does not have the | 
|---|
| 765 | expected value (EAGAIN), retry after reloading __readers.  */ | 
|---|
| 766 | r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 767 | } | 
|---|
| 768 | /* Our snapshot of __readers is up-to-date at this point because we | 
|---|
| 769 | either set WRLOCKED using a CAS (and update r accordingly below, | 
|---|
| 770 | which was used as expected value for the CAS) or got WRLOCKED from | 
|---|
| 771 | another writer whose snapshot of __readers we inherit.  */ | 
|---|
| 772 | r |= PTHREAD_RWLOCK_WRLOCKED; | 
|---|
| 773 | } | 
|---|
| 774 |  | 
|---|
| 775 | /* We are the primary writer; enable blocking on __writers_futex.  Relaxed | 
|---|
| 776 | MO is sufficient for futex words; acquire MO on the previous | 
|---|
| 777 | modifications of __readers ensures that this store happens after the | 
|---|
| 778 | store of value 0 by the previous primary writer.  */ | 
|---|
| 779 | atomic_store_relaxed (&rwlock->__data.__writers_futex, | 
|---|
| 780 | 1 | (may_share_futex_used_flag | 
|---|
| 781 | ? PTHREAD_RWLOCK_FUTEX_USED : 0)); | 
|---|
| 782 |  | 
|---|
| 783 | /* If we are in a write phase, we have acquired the lock.  */ | 
|---|
| 784 | if ((r & PTHREAD_RWLOCK_WRPHASE) != 0) | 
|---|
| 785 | goto done; | 
|---|
| 786 |  | 
|---|
| 787 | /* If we are in a read phase and there are no readers, try to start a write | 
|---|
| 788 | phase.  */ | 
|---|
| 789 | while ((r & PTHREAD_RWLOCK_WRPHASE) == 0 | 
|---|
| 790 | && (r >> PTHREAD_RWLOCK_READER_SHIFT) == 0) | 
|---|
| 791 | { | 
|---|
| 792 | /* Acquire MO so that we synchronize with prior writers and do | 
|---|
| 793 | not interfere with their updates to __writers_futex, as well | 
|---|
| 794 | as regarding prior readers and their updates to __wrphase_futex, | 
|---|
| 795 | respectively.  */ | 
|---|
| 796 | if (atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers, | 
|---|
| 797 | &r, r | PTHREAD_RWLOCK_WRPHASE)) | 
|---|
| 798 | { | 
|---|
| 799 | /* We have started a write phase, so need to enable readers to wait. | 
|---|
| 800 | See the similar case in __pthread_rwlock_rdlock_full.  Unlike in | 
|---|
| 801 | that similar case, we are the (only) primary writer and so do | 
|---|
| 802 | not need to wake another writer.  */ | 
|---|
| 803 | atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 1); | 
|---|
| 804 |  | 
|---|
| 805 | goto done; | 
|---|
| 806 | } | 
|---|
| 807 | /* TODO Back-off.  */ | 
|---|
| 808 | } | 
|---|
| 809 |  | 
|---|
| 810 | /* We became the primary writer in a read phase and there were readers when | 
|---|
| 811 | we did (because of the previous loop).  Thus, we have to wait for | 
|---|
| 812 | explicit hand-over from one of these readers. | 
|---|
| 813 | We basically do the same steps as for the similar case in | 
|---|
| 814 | __pthread_rwlock_rdlock_full, except that we additionally might try | 
|---|
| 815 | to directly hand over to another writer and need to wake up | 
|---|
| 816 | other writers or waiting readers (i.e., PTHREAD_RWLOCK_RWAITING).  */ | 
|---|
| 817 | unsigned int wpf; | 
|---|
| 818 | bool ready = false; | 
|---|
| 819 | for (;;) | 
|---|
| 820 | { | 
|---|
| 821 | while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex)) | 
|---|
| 822 | | PTHREAD_RWLOCK_FUTEX_USED) == PTHREAD_RWLOCK_FUTEX_USED) | 
|---|
| 823 | { | 
|---|
| 824 | int private = __pthread_rwlock_get_private (rwlock); | 
|---|
| 825 | if ((wpf & PTHREAD_RWLOCK_FUTEX_USED) == 0 | 
|---|
| 826 | && (!atomic_compare_exchange_weak_relaxed | 
|---|
| 827 | (&rwlock->__data.__wrphase_futex, &wpf, | 
|---|
| 828 | PTHREAD_RWLOCK_FUTEX_USED))) | 
|---|
| 829 | continue; | 
|---|
| 830 | int err = futex_abstimed_wait (&rwlock->__data.__wrphase_futex, | 
|---|
| 831 | PTHREAD_RWLOCK_FUTEX_USED, | 
|---|
| 832 | clockid, abstime, private); | 
|---|
| 833 | if (err == ETIMEDOUT) | 
|---|
| 834 | { | 
|---|
| 835 | if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP) | 
|---|
| 836 | { | 
|---|
| 837 | /* We try writer--writer hand-over.  */ | 
|---|
| 838 | unsigned int w | 
|---|
| 839 | = atomic_load_relaxed (&rwlock->__data.__writers); | 
|---|
| 840 | if (w != 0) | 
|---|
| 841 | { | 
|---|
| 842 | /* We are about to hand over WRLOCKED, so we must | 
|---|
| 843 | release __writers_futex too; otherwise, we'd have | 
|---|
| 844 | a pending store, which could at least prevent | 
|---|
| 845 | other threads from waiting using the futex | 
|---|
| 846 | because it could interleave with the stores | 
|---|
| 847 | by subsequent writers.  In turn, this means that | 
|---|
| 848 | we have to clean up when we do not hand over | 
|---|
| 849 | WRLOCKED. | 
|---|
| 850 | Release MO so that another writer that gets | 
|---|
| 851 | WRLOCKED from us can take over our view of | 
|---|
| 852 | __readers.  */ | 
|---|
| 853 | unsigned int wf | 
|---|
| 854 | = atomic_exchange_relaxed (&rwlock->__data.__writers_futex, 0); | 
|---|
| 855 | while (w != 0) | 
|---|
| 856 | { | 
|---|
| 857 | if (atomic_compare_exchange_weak_release | 
|---|
| 858 | (&rwlock->__data.__writers, &w, | 
|---|
| 859 | w | PTHREAD_RWLOCK_WRHANDOVER)) | 
|---|
| 860 | { | 
|---|
| 861 | /* Wake other writers.  */ | 
|---|
| 862 | if ((wf & PTHREAD_RWLOCK_FUTEX_USED) != 0) | 
|---|
| 863 | futex_wake (&rwlock->__data.__writers_futex, | 
|---|
| 864 | 1, private); | 
|---|
| 865 | return ETIMEDOUT; | 
|---|
| 866 | } | 
|---|
| 867 | /* TODO Back-off.  */ | 
|---|
| 868 | } | 
|---|
| 869 | /* We still own WRLOCKED and someone else might set | 
|---|
| 870 | a write phase concurrently, so enable waiting | 
|---|
| 871 | again.  Make sure we don't loose the flag that | 
|---|
| 872 | signals whether there are threads waiting on | 
|---|
| 873 | this futex.  */ | 
|---|
| 874 | atomic_store_relaxed (&rwlock->__data.__writers_futex, wf); | 
|---|
| 875 | } | 
|---|
| 876 | } | 
|---|
| 877 | /* If we timed out and we are not in a write phase, we can | 
|---|
| 878 | just stop being a primary writer.  Otherwise, we just | 
|---|
| 879 | acquire the lock.  */ | 
|---|
| 880 | r = atomic_load_relaxed (&rwlock->__data.__readers); | 
|---|
| 881 | if ((r & PTHREAD_RWLOCK_WRPHASE) == 0) | 
|---|
| 882 | { | 
|---|
| 883 | /* We are about to release WRLOCKED, so we must release | 
|---|
| 884 | __writers_futex too; see the handling of | 
|---|
| 885 | writer--writer hand-over above.  */ | 
|---|
| 886 | unsigned int wf | 
|---|
| 887 | = atomic_exchange_relaxed (&rwlock->__data.__writers_futex, 0); | 
|---|
| 888 | while ((r & PTHREAD_RWLOCK_WRPHASE) == 0) | 
|---|
| 889 | { | 
|---|
| 890 | /* While we don't need to make anything from a | 
|---|
| 891 | caller's critical section visible to other | 
|---|
| 892 | threads, we need to ensure that our changes to | 
|---|
| 893 | __writers_futex are properly ordered. | 
|---|
| 894 | Therefore, use release MO to synchronize with | 
|---|
| 895 | subsequent primary writers.  Also wake up any | 
|---|
| 896 | waiting readers as they are waiting because of | 
|---|
| 897 | us.  */ | 
|---|
| 898 | if (atomic_compare_exchange_weak_release | 
|---|
| 899 | (&rwlock->__data.__readers, &r, | 
|---|
| 900 | (r ^ PTHREAD_RWLOCK_WRLOCKED) | 
|---|
| 901 | & ~(unsigned int) PTHREAD_RWLOCK_RWAITING)) | 
|---|
| 902 | { | 
|---|
| 903 | /* Wake other writers.  */ | 
|---|
| 904 | if ((wf & PTHREAD_RWLOCK_FUTEX_USED) != 0) | 
|---|
| 905 | futex_wake (&rwlock->__data.__writers_futex, | 
|---|
| 906 | 1, private); | 
|---|
| 907 | /* Wake waiting readers.  */ | 
|---|
| 908 | if ((r & PTHREAD_RWLOCK_RWAITING) != 0) | 
|---|
| 909 | futex_wake (&rwlock->__data.__readers, | 
|---|
| 910 | INT_MAX, private); | 
|---|
| 911 | return ETIMEDOUT; | 
|---|
| 912 | } | 
|---|
| 913 | } | 
|---|
| 914 | /* We still own WRLOCKED and someone else might set a | 
|---|
| 915 | write phase concurrently, so enable waiting again. | 
|---|
| 916 | Make sure we don't loose the flag that signals | 
|---|
| 917 | whether there are threads waiting on this futex.  */ | 
|---|
| 918 | atomic_store_relaxed (&rwlock->__data.__writers_futex, wf); | 
|---|
| 919 | } | 
|---|
| 920 | /* Use the acquire MO fence to mirror the steps taken in the | 
|---|
| 921 | non-timeout case.  Note that the read can happen both | 
|---|
| 922 | in the atomic_load above as well as in the failure case | 
|---|
| 923 | of the CAS operation.  */ | 
|---|
| 924 | atomic_thread_fence_acquire (); | 
|---|
| 925 | /* We still need to wait for explicit hand-over, but we must | 
|---|
| 926 | not use futex_wait anymore.  */ | 
|---|
| 927 | while ((atomic_load_relaxed (&rwlock->__data.__wrphase_futex) | 
|---|
| 928 | | PTHREAD_RWLOCK_FUTEX_USED) | 
|---|
| 929 | == PTHREAD_RWLOCK_FUTEX_USED) | 
|---|
| 930 | { | 
|---|
| 931 | /* TODO Back-off.  */ | 
|---|
| 932 | } | 
|---|
| 933 | ready = true; | 
|---|
| 934 | break; | 
|---|
| 935 | } | 
|---|
| 936 | /* If we got interrupted (EINTR) or the futex word does not have | 
|---|
| 937 | the expected value (EAGAIN), retry.  */ | 
|---|
| 938 | } | 
|---|
| 939 | /* See pthread_rwlock_rdlock_full.  */ | 
|---|
| 940 | if (ready) | 
|---|
| 941 | break; | 
|---|
| 942 | if ((atomic_load_acquire (&rwlock->__data.__readers) | 
|---|
| 943 | & PTHREAD_RWLOCK_WRPHASE) != 0) | 
|---|
| 944 | ready = true; | 
|---|
| 945 | } | 
|---|
| 946 |  | 
|---|
| 947 | done: | 
|---|
| 948 | atomic_store_relaxed (&rwlock->__data.__cur_writer, | 
|---|
| 949 | THREAD_GETMEM (THREAD_SELF, tid)); | 
|---|
| 950 | return 0; | 
|---|
| 951 | } | 
|---|
| 952 |  | 
|---|