pthread_cond_wait.c source code [Glibc/nptl/pthread_cond_wait.c]

1	/ Copyright (C) 2003-2020 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <endian.h>
20	#include <errno.h>
21	#include <sysdep.h>
22	#include <futex-internal.h>
23	#include <pthread.h>
24	#include <pthreadP.h>
25	#include <sys/time.h>
26	#include <atomic.h>
27	#include <stdint.h>
28	#include <stdbool.h>
29
30	#include <shlib-compat.h>
31	#include <stap-probe.h>
32	#include <time.h>
33
34	#include "pthread_cond_common.c"
35
36
37	struct _condvar_cleanup_buffer
38	{
39	uint64_t wseq;
40	pthread_cond_t *cond;
41	pthread_mutex_t *mutex;
42	int private;
43	};
44
45
46	/ Decrease the waiter reference count. /
47	static void
48	__condvar_confirm_wakeup (pthread_cond_t cond, int* private)
49	{
50	/ If destruction is pending (i.e., the wake-request flag is nonzero) and we*
51	are the last waiter (prior value of __wrefs was 1 << 3), then wake any
52	threads waiting in pthread_cond_destroy. Release MO to synchronize with
53	these threads. Don't bother clearing the wake-up request flag. /*
54	if ((atomic_fetch_add_release (&cond->__data.__wrefs, -`8`) >> `2`) == `3`)
55	futex_wake (&cond->__data.__wrefs, INT_MAX, private);
56	}
57
58
59	/ Cancel waiting after having registered as a waiter previously. SEQ is our*
60	position and G is our group index.
61	The goal of cancellation is to make our group smaller if that is still
62	possible. If we are in a closed group, this is not possible anymore; in
63	this case, we need to send a replacement signal for the one we effectively
64	consumed because the signal should have gotten consumed by another waiter
65	instead; we must not both cancel waiting and consume a signal.
66
67	Must not be called while still holding a reference on the group.
68
69	Returns true iff we consumed a signal.
70
71	On some kind of timeouts, we may be able to pretend that a signal we
72	effectively consumed happened before the timeout (i.e., similarly to first
73	spinning on signals before actually checking whether the timeout has
74	passed already). Doing this would allow us to skip sending a replacement
75	signal, but this case might happen rarely because the end of the timeout
76	must race with someone else sending a signal. Therefore, we don't bother
77	trying to optimize this. /*
78	static void
79	__condvar_cancel_waiting (pthread_cond_t cond, uint64_t seq, unsigned* int g,
80	int private)
81	{
82	bool consumed_signal = false;
83
84	/ No deadlock with group switching is possible here because we have do*
85	not hold a reference on the group. /*
86	__condvar_acquire_lock (cond, private);
87
88	uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> `1`;
89	if (g1_start > seq)
90	{
91	/ Our group is closed, so someone provided enough signals for it.*
92	Thus, we effectively consumed a signal. /*
93	consumed_signal = true;
94	}
95	else
96	{
97	if (g1_start + __condvar_get_orig_size (cond) <= seq)
98	{
99	/ We are in the current G2 and thus cannot have consumed a signal.*
100	Reduce its effective size or handle overflow. Remember that in
101	G2, unsigned int size is zero or a negative value. /*
102	if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > `0`)
103	{
104	cond->__data.__g_size[g]--;
105	}
106	else
107	{
108	/ Cancellations would overflow the maximum group size. Just*
109	wake up everyone spuriously to create a clean state. This
110	also means we do not consume a signal someone else sent. /*
111	__condvar_release_lock (cond, private);
112	__pthread_cond_broadcast (cond);
113	return;
114	}
115	}
116	else
117	{
118	/ We are in current G1. If the group's size is zero, someone put*
119	a signal in the group that nobody else but us can consume. /*
120	if (cond->__data.__g_size[g] == `0`)
121	consumed_signal = true;
122	else
123	{
124	/ Otherwise, we decrease the size of the group. This is*
125	equivalent to atomically putting in a signal just for us and
126	consuming it right away. We do not consume a signal sent
127	by someone else. We also cannot have consumed a futex
128	wake-up because if we were cancelled or timed out in a futex
129	call, the futex will wake another waiter. /*
130	cond->__data.__g_size[g]--;
131	}
132	}
133	}
134
135	__condvar_release_lock (cond, private);
136
137	if (consumed_signal)
138	{
139	/ We effectively consumed a signal even though we didn't want to.*
140	Therefore, we need to send a replacement signal.
141	If we would want to optimize this, we could do what
142	pthread_cond_signal does right in the critical section above. /*
143	__pthread_cond_signal (cond);
144	}
145	}
146
147	/ Wake up any signalers that might be waiting. /
148	static void
149	__condvar_dec_grefs (pthread_cond_t cond, unsigned* int g, int private)
150	{
151	/ Release MO to synchronize-with the acquire load in*
152	__condvar_quiesce_and_switch_g1. /*
153	if (atomic_fetch_add_release (cond->__data.__g_refs + g, -`2`) == `3`)
154	{
155	/ Clear the wake-up request flag before waking up. We do not need more*
156	than relaxed MO and it doesn't matter if we apply this for an aliased
157	group because we wake all futex waiters right after clearing the
158	flag. /*
159	atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) `1`);
160	futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
161	}
162	}
163
164	/ Clean-up for cancellation of waiters waiting for normal signals. We cancel*
165	our registration as a waiter, confirm we have woken up, and re-acquire the
166	mutex. /*
167	static void
168	__condvar_cleanup_waiting (void *arg)
169	{
170	struct _condvar_cleanup_buffer *cbuffer =
171	(struct _condvar_cleanup_buffer *) arg;
172	pthread_cond_t *cond = cbuffer->cond;
173	unsigned g = cbuffer->wseq & `1`;
174
175	__condvar_dec_grefs (cond, g, cbuffer->private);
176
177	__condvar_cancel_waiting (cond, cbuffer->wseq >> `1`, g, cbuffer->private);
178	/ FIXME With the current cancellation implementation, it is possible that*
179	a thread is cancelled after it has returned from a syscall. This could
180	result in a cancelled waiter consuming a futex wake-up that is then
181	causing another waiter in the same group to not wake up. To work around
182	this issue until we have fixed cancellation, just add a futex wake-up
183	conservatively. /*
184	futex_wake (cond->__data.__g_signals + g, `1`, cbuffer->private);
185
186	__condvar_confirm_wakeup (cond, cbuffer->private);
187
188	/ XXX If locking the mutex fails, should we just stop execution? This*
189	might be better than silently ignoring the error. /*
190	__pthread_mutex_cond_lock (cbuffer->mutex);
191	}
192
193	/ This condvar implementation guarantees that all calls to signal and*
194	broadcast and all of the three virtually atomic parts of each call to wait
195	(i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
196	acquiring the mutex) happen in some total order that is consistent with the
197	happens-before relations in the calling program. However, this order does
198	not necessarily result in additional happens-before relations being
199	established (which aligns well with spurious wake-ups being allowed).
200
201	All waiters acquire a certain position in a 64b waiter sequence (__wseq).
202	This sequence determines which waiters are allowed to consume signals.
203	A broadcast is equal to sending as many signals as are unblocked waiters.
204	When a signal arrives, it samples the current value of __wseq with a
205	relaxed-MO load (i.e., the position the next waiter would get). (This is
206	sufficient because it is consistent with happens-before; the caller can
207	enforce stronger ordering constraints by calling signal while holding the
208	mutex.) Only waiters with a position less than the __wseq value observed
209	by the signal are eligible to consume this signal.
210
211	This would be straight-forward to implement if waiters would just spin but
212	we need to let them block using futexes. Futexes give no guarantee of
213	waking in FIFO order, so we cannot reliably wake eligible waiters if we
214	just use a single futex. Also, futex words are 32b in size, but we need
215	to distinguish more than 1<<32 states because we need to represent the
216	order of wake-up (and thus which waiters are eligible to consume signals);
217	blocking in a futex is not atomic with a waiter determining its position in
218	the waiter sequence, so we need the futex word to reliably notify waiters
219	that they should not attempt to block anymore because they have been
220	already signaled in the meantime. While an ABA issue on a 32b value will
221	be rare, ignoring it when we are aware of it is not the right thing to do
222	either.
223
224	Therefore, we use a 64b counter to represent the waiter sequence (on
225	architectures which only support 32b atomics, we use a few bits less).
226	To deal with the blocking using futexes, we maintain two groups of waiters:
227	* Group G1 consists of waiters that are all eligible to consume signals;
228	incoming signals will always signal waiters in this group until all
229	waiters in G1 have been signaled.
230	* Group G2 consists of waiters that arrive when a G1 is present and still
231	contains waiters that have not been signaled. When all waiters in G1
232	are signaled and a new signal arrives, the new signal will convert G2
233	into the new G1 and create a new G2 for future waiters.
234
235	We cannot allocate new memory because of process-shared condvars, so we
236	have just two slots of groups that change their role between G1 and G2.
237	Each has a separate futex word, a number of signals available for
238	consumption, a size (number of waiters in the group that have not been
239	signaled), and a reference count.
240
241	The group reference count is used to maintain the number of waiters that
242	are using the group's futex. Before a group can change its role, the
243	reference count must show that no waiters are using the futex anymore; this
244	prevents ABA issues on the futex word.
245
246	To represent which intervals in the waiter sequence the groups cover (and
247	thus also which group slot contains G1 or G2), we use a 64b counter to
248	designate the start position of G1 (inclusive), and a single bit in the
249	waiter sequence counter to represent which group slot currently contains
250	G2. This allows us to switch group roles atomically wrt. waiters obtaining
251	a position in the waiter sequence. The G1 start position allows waiters to
252	figure out whether they are in a group that has already been completely
253	signaled (i.e., if the current G1 starts at a later position that the
254	waiter's position). Waiters cannot determine whether they are currently
255	in G2 or G1 -- but they do not have too because all they are interested in
256	is whether there are available signals, and they always start in G2 (whose
257	group slot they know because of the bit in the waiter sequence. Signalers
258	will simply fill the right group until it is completely signaled and can
259	be closed (they do not switch group roles until they really have to to
260	decrease the likelihood of having to wait for waiters still holding a
261	reference on the now-closed G1).
262
263	Signalers maintain the initial size of G1 to be able to determine where
264	G2 starts (G2 is always open-ended until it becomes G1). They track the
265	remaining size of a group; when waiters cancel waiting (due to PThreads
266	cancellation or timeouts), they will decrease this remaining size as well.
267
268	To implement condvar destruction requirements (i.e., that
269	pthread_cond_destroy can be called as soon as all waiters have been
270	signaled), waiters increment a reference count before starting to wait and
271	decrement it after they stopped waiting but right before they acquire the
272	mutex associated with the condvar.
273
274	pthread_cond_t thus consists of the following (bits that are used for
275	flags and are not part of the primary value of each field but necessary
276	to make some things atomic or because there was no space for them
277	elsewhere in the data structure):
278
279	__wseq: Waiter sequence counter
280	* LSB is index of current G2.
281	* Waiters fetch-add while having acquire the mutex associated with the
282	condvar. Signalers load it and fetch-xor it concurrently.
283	__g1_start: Starting position of G1 (inclusive)
284	* LSB is index of current G2.
285	* Modified by signalers while having acquired the condvar-internal lock
286	and observed concurrently by waiters.
287	__g1_orig_size: Initial size of G1
288	* The two least-significant bits represent the condvar-internal lock.
289	* Only accessed while having acquired the condvar-internal lock.
290	__wrefs: Waiter reference counter.
291	* Bit 2 is true if waiters should run futex_wake when they remove the
292	last reference. pthread_cond_destroy uses this as futex word.
293	* Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
294	* Bit 0 is true iff this is a process-shared condvar.
295	* Simple reference count used by both waiters and pthread_cond_destroy.
296	(If the format of __wrefs is changed, update nptl_lock_constants.pysym
297	and the pretty printers.)
298	For each of the two groups, we have:
299	__g_refs: Futex waiter reference count.
300	* LSB is true if waiters should run futex_wake when they remove the
301	last reference.
302	* Reference count used by waiters concurrently with signalers that have
303	acquired the condvar-internal lock.
304	__g_signals: The number of signals that can still be consumed.
305	* Used as a futex word by waiters. Used concurrently by waiters and
306	signalers.
307	* LSB is true iff this group has been completely signaled (i.e., it is
308	closed).
309	__g_size: Waiters remaining in this group (i.e., which have not been
310	signaled yet.
311	* Accessed by signalers and waiters that cancel waiting (both do so only
312	when having acquired the condvar-internal lock.
313	* The size of G2 is always zero because it cannot be determined until
314	the group becomes G1.
315	* Although this is of unsigned type, we rely on using unsigned overflow
316	rules to make this hold effectively negative values too (in
317	particular, when waiters in G2 cancel waiting).
318
319	A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
320	a condvar that has G2 starting at position 0 and a G1 that is closed.
321
322	Because waiters do not claim ownership of a group right when obtaining a
323	position in __wseq but only reference count the group when using futexes
324	to block, it can happen that a group gets closed before a waiter can
325	increment the reference count. Therefore, waiters have to check whether
326	their group is already closed using __g1_start. They also have to perform
327	this check when spinning when trying to grab a signal from __g_signals.
328	Note that for these checks, using relaxed MO to load __g1_start is
329	sufficient because if a waiter can see a sufficiently large value, it could
330	have also consume a signal in the waiters group.
331
332	Waiters try to grab a signal from __g_signals without holding a reference
333	count, which can lead to stealing a signal from a more recent group after
334	their own group was already closed. They cannot always detect whether they
335	in fact did because they do not know when they stole, but they can
336	conservatively add a signal back to the group they stole from; if they
337	did so unnecessarily, all that happens is a spurious wake-up. To make this
338	even less likely, __g1_start contains the index of the current g2 too,
339	which allows waiters to check if there aliasing on the group slots; if
340	there wasn't, they didn't steal from the current G1, which means that the
341	G1 they stole from must have been already closed and they do not need to
342	fix anything.
343
344	It is essential that the last field in pthread_cond_t is __g_signals[1]:
345	The previous condvar used a pointer-sized field in pthread_cond_t, so a
346	PTHREAD_COND_INITIALIZER from that condvar implementation might only
347	initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
348	in total instead of the 48 we need). __g_signals[1] is not accessed before
349	the first group switch (G2 starts at index 0), which will set its value to
350	zero after a harmless fetch-or whose return value is ignored. This
351	effectively completes initialization.
352
353
354	Limitations:
355	* This condvar isn't designed to allow for more than
356	__PTHREAD_COND_MAX_GROUP_SIZE (1 << 31) calls to __pthread_cond_wait.*
357	* More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
358	supported.
359	* Beyond what is allowed as errors by POSIX or documented, we can also
360	return the following errors:
361	* EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
362	* EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike
363	for other errors, this can happen when we re-acquire the mutex; this
364	isn't allowed by POSIX (which requires all errors to virtually happen
365	before we release the mutex or change the condvar state), but there's
366	nothing we can do really.
367	* When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
368	returned by __pthread_tpp_change_priority. We will already have
369	released the mutex in such cases, so the caller cannot expect to own
370	MUTEX.
371
372	Other notes:
373	* Instead of the normal mutex unlock / lock functions, we use
374	__pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
375	because those will not change the mutex-internal users count, so that it
376	can be detected when a condvar is still associated with a particular
377	mutex because there is a waiter blocked on this condvar using this mutex.
378	*/
379	static __always_inline int
380	__pthread_cond_wait_common (pthread_cond_t cond, pthread_mutex_t mutex,
381	clockid_t clockid,
382	const struct timespec *abstime)
383	{
384	const int maxspin = `0`;
385	int err;
386	int result = `0`;
387
388	LIBC_PROBE (cond_wait, `2`, cond, mutex);
389
390	/ clockid will already have been checked by*
391	__pthread_cond_clockwait or pthread_condattr_setclock, or we
392	don't use it if abstime is NULL, so we don't need to check it
393	here. /*
394
395	/ Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an*
396	atomic operation because signals and broadcasts may update the group
397	switch without acquiring the mutex. We do not need release MO here
398	because we do not need to establish any happens-before relation with
399	signalers (see __pthread_cond_signal); modification order alone
400	establishes a total order of waiters/signals. We do need acquire MO
401	to synchronize with group reinitialization in
402	__condvar_quiesce_and_switch_g1. /*
403	uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, `2`);
404	/ Find our group's index. We always go into what was G2 when we acquired*
405	our position. /*
406	unsigned int g = wseq & `1`;
407	uint64_t seq = wseq >> `1`;
408
409	/ Increase the waiter reference count. Relaxed MO is sufficient because*
410	we only need to synchronize when decrementing the reference count. /*
411	unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, `8`);
412	int private = __condvar_get_private (flags);
413
414	/ Now that we are registered as a waiter, we can release the mutex.*
415	Waiting on the condvar must be atomic with releasing the mutex, so if
416	the mutex is used to establish a happens-before relation with any
417	signaler, the waiter must be visible to the latter; thus, we release the
418	mutex after registering as waiter.
419	If releasing the mutex fails, we just cancel our registration as a
420	waiter and confirm that we have woken up. /*
421	err = __pthread_mutex_unlock_usercnt (mutex, `0`);
422	if (__glibc_unlikely (err != `0`))
423	{
424	__condvar_cancel_waiting (cond, seq, g, private);
425	__condvar_confirm_wakeup (cond, private);
426	return err;
427	}
428
429	/ Now wait until a signal is available in our group or it is closed.*
430	Acquire MO so that if we observe a value of zero written after group
431	switching in __condvar_quiesce_and_switch_g1, we synchronize with that
432	store and will see the prior update of __g1_start done while switching
433	groups too. /*
434	unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
435
436	do
437	{
438	while (`1`)
439	{
440	/ Spin-wait first.*
441	Note that spinning first without checking whether a timeout
442	passed might lead to what looks like a spurious wake-up even
443	though we should return ETIMEDOUT (e.g., if the caller provides
444	an absolute timeout that is clearly in the past). However,
445	(1) spurious wake-ups are allowed, (2) it seems unlikely that a
446	user will (ab)use pthread_cond_wait as a check for whether a
447	point in time is in the past, and (3) spinning first without
448	having to compare against the current time seems to be the right
449	choice from a performance perspective for most use cases. /*
450	unsigned int spin = maxspin;
451	while (signals == `0` && spin > `0`)
452	{
453	/ Check that we are not spinning on a group that's already*
454	closed. /*
455	if (seq < (__condvar_load_g1_start_relaxed (cond) >> `1`))
456	goto done;
457
458	/ TODO Back off. /
459
460	/ Reload signals. See above for MO. /
461	signals = atomic_load_acquire (cond->__data.__g_signals + g);
462	spin--;
463	}
464
465	/ If our group will be closed as indicated by the flag on signals,*
466	don't bother grabbing a signal. /*
467	if (signals & `1`)
468	goto done;
469
470	/ If there is an available signal, don't block. /
471	if (signals != `0`)
472	break;
473
474	/ No signals available after spinning, so prepare to block.*
475	We first acquire a group reference and use acquire MO for that so
476	that we synchronize with the dummy read-modify-write in
477	__condvar_quiesce_and_switch_g1 if we read from that. In turn,
478	in this case this will make us see the closed flag on __g_signals
479	that designates a concurrent attempt to reuse the group's slot.
480	We use acquire MO for the __g_signals check to make the
481	__g1_start check work (see spinning above).
482	Note that the group reference acquisition will not mask the
483	release MO when decrementing the reference count because we use
484	an atomic read-modify-write operation and thus extend the release
485	sequence. /*
486	atomic_fetch_add_acquire (cond->__data.__g_refs + g, `2`);
487	if (((atomic_load_acquire (cond->__data.__g_signals + g) & `1`) != `0`)
488	\|\| (seq < (__condvar_load_g1_start_relaxed (cond) >> `1`)))
489	{
490	/ Our group is closed. Wake up any signalers that might be*
491	waiting. /*
492	__condvar_dec_grefs (cond, g, private);
493	goto done;
494	}
495
496	// Now block.
497	struct _pthread_cleanup_buffer buffer;
498	struct _condvar_cleanup_buffer cbuffer;
499	cbuffer.wseq = wseq;
500	cbuffer.cond = cond;
501	cbuffer.mutex = mutex;
502	cbuffer.private = private;
503	__pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
504
505	if (abstime == NULL)
506	{
507	/ Block without a timeout. /
508	err = futex_wait_cancelable (
509	cond->__data.__g_signals + g, `0`, private);
510	}
511	else
512	{
513	/ Block, but with a timeout.*
514	Work around the fact that the kernel rejects negative timeout
515	values despite them being valid. /*
516	if (__glibc_unlikely (abstime->tv_sec < `0`))
517	err = ETIMEDOUT;
518	else
519	{
520	err = futex_abstimed_wait_cancelable
521	(cond->__data.__g_signals + g, `0`, clockid, abstime,
522	private);
523	}
524	}
525
526	__pthread_cleanup_pop (&buffer, `0`);
527
528	if (__glibc_unlikely (err == ETIMEDOUT))
529	{
530	__condvar_dec_grefs (cond, g, private);
531	/ If we timed out, we effectively cancel waiting. Note that*
532	we have decremented __g_refs before cancellation, so that a
533	deadlock between waiting for quiescence of our group in
534	__condvar_quiesce_and_switch_g1 and us trying to acquire
535	the lock during cancellation is not possible. /*
536	__condvar_cancel_waiting (cond, seq, g, private);
537	result = ETIMEDOUT;
538	goto done;
539	}
540	else
541	__condvar_dec_grefs (cond, g, private);
542
543	/ Reload signals. See above for MO. /
544	signals = atomic_load_acquire (cond->__data.__g_signals + g);
545	}
546
547	}
548	/ Try to grab a signal. Use acquire MO so that we see an up-to-date value*
549	of __g1_start below (see spinning above for a similar case). In
550	particular, if we steal from a more recent group, we will also see a
551	more recent __g1_start below. /*
552	while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
553	&signals, signals - `2`));
554
555	/ We consumed a signal but we could have consumed from a more recent group*
556	that aliased with ours due to being in the same group slot. If this
557	might be the case our group must be closed as visible through
558	__g1_start. /*
559	uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
560	if (seq < (g1_start >> `1`))
561	{
562	/ We potentially stole a signal from a more recent group but we do not*
563	know which group we really consumed from.
564	We do not care about groups older than current G1 because they are
565	closed; we could have stolen from these, but then we just add a
566	spurious wake-up for the current groups.
567	We will never steal a signal from current G2 that was really intended
568	for G2 because G2 never receives signals (until it becomes G1). We
569	could have stolen a signal from G2 that was conservatively added by a
570	previous waiter that also thought it stole a signal -- but given that
571	that signal was added unnecessarily, it's not a problem if we steal
572	it.
573	Thus, the remaining case is that we could have stolen from the current
574	G1, where "current" means the __g1_start value we observed. However,
575	if the current G1 does not have the same slot index as we do, we did
576	not steal from it and do not need to undo that. This is the reason
577	for putting a bit with G2's index into__g1_start as well. /*
578	if (((g1_start & `1`) ^ `1`) == g)
579	{
580	/ We have to conservatively undo our potential mistake of stealing*
581	a signal. We can stop trying to do that when the current G1
582	changes because other spinning waiters will notice this too and
583	__condvar_quiesce_and_switch_g1 has checked that there are no
584	futex waiters anymore before switching G1.
585	Relaxed MO is fine for the __g1_start load because we need to
586	merely be able to observe this fact and not have to observe
587	something else as well.
588	??? Would it help to spin for a little while to see whether the
589	current G1 gets closed? This might be worthwhile if the group is
590	small or close to being closed. /*
591	unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
592	while (__condvar_load_g1_start_relaxed (cond) == g1_start)
593	{
594	/ Try to add a signal. We don't need to acquire the lock*
595	because at worst we can cause a spurious wake-up. If the
596	group is in the process of being closed (LSB is true), this
597	has an effect similar to us adding a signal. /*
598	if (((s & `1`) != `0`)
599	\|\| atomic_compare_exchange_weak_relaxed
600	(cond->__data.__g_signals + g, &s, s + `2`))
601	{
602	/ If we added a signal, we also need to add a wake-up on*
603	the futex. We also need to do that if we skipped adding
604	a signal because the group is being closed because
605	while __condvar_quiesce_and_switch_g1 could have closed
606	the group, it might stil be waiting for futex waiters to
607	leave (and one of those waiters might be the one we stole
608	the signal from, which cause it to block using the
609	futex). /*
610	futex_wake (cond->__data.__g_signals + g, `1`, private);
611	break;
612	}
613	/ TODO Back off. /
614	}
615	}
616	}
617
618	done:
619
620	/ Confirm that we have been woken. We do that before acquiring the mutex*
621	to allow for execution of pthread_cond_destroy while having acquired the
622	mutex. /*
623	__condvar_confirm_wakeup (cond, private);
624
625	/ Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT,*
626	which is set to ETIMEDOUT if a timeout occured, or zero otherwise. /*
627	err = __pthread_mutex_cond_lock (mutex);
628	/ XXX Abort on errors that are disallowed by POSIX? /
629	return (err != `0`) ? err : result;
630	}
631
632
633	/ See __pthread_cond_wait_common. /
634	int
635	__pthread_cond_wait (pthread_cond_t cond, pthread_mutex_t mutex)
636	{
637	/ clockid is unused when abstime is NULL. /
638	return __pthread_cond_wait_common (cond, mutex, `0`, NULL);
639	}
640
641	/ See __pthread_cond_wait_common. /
642	int
643	__pthread_cond_timedwait (pthread_cond_t cond, pthread_mutex_t mutex,
644	const struct timespec *abstime)
645	{
646	/ Check parameter validity. This should also tell the compiler that*
647	it can assume that abstime is not NULL. /*
648	if (! valid_nanoseconds (abstime->tv_nsec))
649	return EINVAL;
650
651	/ Relaxed MO is suffice because clock ID bit is only modified*
652	in condition creation. /*
653	unsigned int flags = atomic_load_relaxed (&cond->__data.__wrefs);
654	clockid_t clockid = (flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK)
655	? CLOCK_MONOTONIC : CLOCK_REALTIME;
656	return __pthread_cond_wait_common (cond, mutex, clockid, abstime);
657	}
658	versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
659	GLIBC_2_3_2);
660	versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
661	GLIBC_2_3_2);
662
663	/ See __pthread_cond_wait_common. /
664	int
665	__pthread_cond_clockwait (pthread_cond_t cond, pthread_mutex_t mutex,
666	clockid_t clockid,
667	const struct timespec *abstime)
668	{
669	/ Check parameter validity. This should also tell the compiler that*
670	it can assume that abstime is not NULL. /*
671	if (! valid_nanoseconds (abstime->tv_nsec))
672	return EINVAL;
673
674	if (!futex_abstimed_supported_clockid (clockid))
675	return EINVAL;
676
677	return __pthread_cond_wait_common (cond, mutex, clockid, abstime);
678	}
679	weak_alias (__pthread_cond_clockwait, pthread_cond_clockwait);
680

Browse the source code of Glibc/nptl/pthread_cond_wait.c