1/*-------------------------------------------------------------------------
2 *
3 * condition_variable.c
4 * Implementation of condition variables. Condition variables provide
5 * a way for one process to wait until a specific condition occurs,
6 * without needing to know the specific identity of the process for
7 * which they are waiting. Waits for condition variables can be
8 * interrupted, unlike LWLock waits. Condition variables are safe
9 * to use within dynamic shared memory segments.
10 *
11 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
13 *
14 * src/backend/storage/lmgr/condition_variable.c
15 *
16 *-------------------------------------------------------------------------
17 */
18
19#include "postgres.h"
20
21#include "miscadmin.h"
22#include "storage/condition_variable.h"
23#include "storage/ipc.h"
24#include "storage/proc.h"
25#include "storage/proclist.h"
26#include "storage/spin.h"
27#include "utils/memutils.h"
28
29/* Initially, we are not prepared to sleep on any condition variable. */
30static ConditionVariable *cv_sleep_target = NULL;
31
32/* Reusable WaitEventSet. */
33static WaitEventSet *cv_wait_event_set = NULL;
34
35/*
36 * Initialize a condition variable.
37 */
38void
39ConditionVariableInit(ConditionVariable *cv)
40{
41 SpinLockInit(&cv->mutex);
42 proclist_init(&cv->wakeup);
43}
44
45/*
46 * Prepare to wait on a given condition variable.
47 *
48 * This can optionally be called before entering a test/sleep loop.
49 * Doing so is more efficient if we'll need to sleep at least once.
50 * However, if the first test of the exit condition is likely to succeed,
51 * it's more efficient to omit the ConditionVariablePrepareToSleep call.
52 * See comments in ConditionVariableSleep for more detail.
53 *
54 * Caution: "before entering the loop" means you *must* test the exit
55 * condition between calling ConditionVariablePrepareToSleep and calling
56 * ConditionVariableSleep. If that is inconvenient, omit calling
57 * ConditionVariablePrepareToSleep.
58 */
59void
60ConditionVariablePrepareToSleep(ConditionVariable *cv)
61{
62 int pgprocno = MyProc->pgprocno;
63
64 /*
65 * If first time through in this process, create a WaitEventSet, which
66 * we'll reuse for all condition variable sleeps.
67 */
68 if (cv_wait_event_set == NULL)
69 {
70 WaitEventSet *new_event_set;
71
72 new_event_set = CreateWaitEventSet(TopMemoryContext, 2);
73 AddWaitEventToSet(new_event_set, WL_LATCH_SET, PGINVALID_SOCKET,
74 MyLatch, NULL);
75 AddWaitEventToSet(new_event_set, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
76 NULL, NULL);
77 /* Don't set cv_wait_event_set until we have a correct WES. */
78 cv_wait_event_set = new_event_set;
79 }
80
81 /*
82 * If some other sleep is already prepared, cancel it; this is necessary
83 * because we have just one static variable tracking the prepared sleep,
84 * and also only one cvWaitLink in our PGPROC. It's okay to do this
85 * because whenever control does return to the other test-and-sleep loop,
86 * its ConditionVariableSleep call will just re-establish that sleep as
87 * the prepared one.
88 */
89 if (cv_sleep_target != NULL)
90 ConditionVariableCancelSleep();
91
92 /* Record the condition variable on which we will sleep. */
93 cv_sleep_target = cv;
94
95 /*
96 * Reset my latch before adding myself to the queue, to ensure that we
97 * don't miss a wakeup that occurs immediately.
98 */
99 ResetLatch(MyLatch);
100
101 /* Add myself to the wait queue. */
102 SpinLockAcquire(&cv->mutex);
103 proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink);
104 SpinLockRelease(&cv->mutex);
105}
106
107/*
108 * Wait for the given condition variable to be signaled.
109 *
110 * This should be called in a predicate loop that tests for a specific exit
111 * condition and otherwise sleeps, like so:
112 *
113 * ConditionVariablePrepareToSleep(cv); // optional
114 * while (condition for which we are waiting is not true)
115 * ConditionVariableSleep(cv, wait_event_info);
116 * ConditionVariableCancelSleep();
117 *
118 * wait_event_info should be a value from one of the WaitEventXXX enums
119 * defined in pgstat.h. This controls the contents of pg_stat_activity's
120 * wait_event_type and wait_event columns while waiting.
121 */
122void
123ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
124{
125 WaitEvent event;
126 bool done = false;
127
128 /*
129 * If the caller didn't prepare to sleep explicitly, then do so now and
130 * return immediately. The caller's predicate loop should immediately
131 * call again if its exit condition is not yet met. This will result in
132 * the exit condition being tested twice before we first sleep. The extra
133 * test can be prevented by calling ConditionVariablePrepareToSleep(cv)
134 * first. Whether it's worth doing that depends on whether you expect the
135 * exit condition to be met initially, in which case skipping the prepare
136 * is recommended because it avoids manipulations of the wait list, or not
137 * met initially, in which case preparing first is better because it
138 * avoids one extra test of the exit condition.
139 *
140 * If we are currently prepared to sleep on some other CV, we just cancel
141 * that and prepare this one; see ConditionVariablePrepareToSleep.
142 */
143 if (cv_sleep_target != cv)
144 {
145 ConditionVariablePrepareToSleep(cv);
146 return;
147 }
148
149 do
150 {
151 CHECK_FOR_INTERRUPTS();
152
153 /*
154 * Wait for latch to be set. (If we're awakened for some other
155 * reason, the code below will cope anyway.)
156 */
157 (void) WaitEventSetWait(cv_wait_event_set, -1, &event, 1,
158 wait_event_info);
159
160 /* Reset latch before examining the state of the wait list. */
161 ResetLatch(MyLatch);
162
163 /*
164 * If this process has been taken out of the wait list, then we know
165 * that it has been signaled by ConditionVariableSignal (or
166 * ConditionVariableBroadcast), so we should return to the caller. But
167 * that doesn't guarantee that the exit condition is met, only that we
168 * ought to check it. So we must put the process back into the wait
169 * list, to ensure we don't miss any additional wakeup occurring while
170 * the caller checks its exit condition. We can take ourselves out of
171 * the wait list only when the caller calls
172 * ConditionVariableCancelSleep.
173 *
174 * If we're still in the wait list, then the latch must have been set
175 * by something other than ConditionVariableSignal; though we don't
176 * guarantee not to return spuriously, we'll avoid this obvious case.
177 */
178 SpinLockAcquire(&cv->mutex);
179 if (!proclist_contains(&cv->wakeup, MyProc->pgprocno, cvWaitLink))
180 {
181 done = true;
182 proclist_push_tail(&cv->wakeup, MyProc->pgprocno, cvWaitLink);
183 }
184 SpinLockRelease(&cv->mutex);
185 } while (!done);
186}
187
188/*
189 * Cancel any pending sleep operation.
190 *
191 * We just need to remove ourselves from the wait queue of any condition
192 * variable for which we have previously prepared a sleep.
193 *
194 * Do nothing if nothing is pending; this allows this function to be called
195 * during transaction abort to clean up any unfinished CV sleep.
196 */
197void
198ConditionVariableCancelSleep(void)
199{
200 ConditionVariable *cv = cv_sleep_target;
201
202 if (cv == NULL)
203 return;
204
205 SpinLockAcquire(&cv->mutex);
206 if (proclist_contains(&cv->wakeup, MyProc->pgprocno, cvWaitLink))
207 proclist_delete(&cv->wakeup, MyProc->pgprocno, cvWaitLink);
208 SpinLockRelease(&cv->mutex);
209
210 cv_sleep_target = NULL;
211}
212
213/*
214 * Wake up the oldest process sleeping on the CV, if there is any.
215 *
216 * Note: it's difficult to tell whether this has any real effect: we know
217 * whether we took an entry off the list, but the entry might only be a
218 * sentinel. Hence, think twice before proposing that this should return
219 * a flag telling whether it woke somebody.
220 */
221void
222ConditionVariableSignal(ConditionVariable *cv)
223{
224 PGPROC *proc = NULL;
225
226 /* Remove the first process from the wakeup queue (if any). */
227 SpinLockAcquire(&cv->mutex);
228 if (!proclist_is_empty(&cv->wakeup))
229 proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
230 SpinLockRelease(&cv->mutex);
231
232 /* If we found someone sleeping, set their latch to wake them up. */
233 if (proc != NULL)
234 SetLatch(&proc->procLatch);
235}
236
237/*
238 * Wake up all processes sleeping on the given CV.
239 *
240 * This guarantees to wake all processes that were sleeping on the CV
241 * at time of call, but processes that add themselves to the list mid-call
242 * will typically not get awakened.
243 */
244void
245ConditionVariableBroadcast(ConditionVariable *cv)
246{
247 int pgprocno = MyProc->pgprocno;
248 PGPROC *proc = NULL;
249 bool have_sentinel = false;
250
251 /*
252 * In some use-cases, it is common for awakened processes to immediately
253 * re-queue themselves. If we just naively try to reduce the wakeup list
254 * to empty, we'll get into a potentially-indefinite loop against such a
255 * process. The semantics we really want are just to be sure that we have
256 * wakened all processes that were in the list at entry. We can use our
257 * own cvWaitLink as a sentinel to detect when we've finished.
258 *
259 * A seeming flaw in this approach is that someone else might signal the
260 * CV and in doing so remove our sentinel entry. But that's fine: since
261 * CV waiters are always added and removed in order, that must mean that
262 * every previous waiter has been wakened, so we're done. We'll get an
263 * extra "set" on our latch from the someone else's signal, which is
264 * slightly inefficient but harmless.
265 *
266 * We can't insert our cvWaitLink as a sentinel if it's already in use in
267 * some other proclist. While that's not expected to be true for typical
268 * uses of this function, we can deal with it by simply canceling any
269 * prepared CV sleep. The next call to ConditionVariableSleep will take
270 * care of re-establishing the lost state.
271 */
272 if (cv_sleep_target != NULL)
273 ConditionVariableCancelSleep();
274
275 /*
276 * Inspect the state of the queue. If it's empty, we have nothing to do.
277 * If there's exactly one entry, we need only remove and signal that
278 * entry. Otherwise, remove the first entry and insert our sentinel.
279 */
280 SpinLockAcquire(&cv->mutex);
281 /* While we're here, let's assert we're not in the list. */
282 Assert(!proclist_contains(&cv->wakeup, pgprocno, cvWaitLink));
283
284 if (!proclist_is_empty(&cv->wakeup))
285 {
286 proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
287 if (!proclist_is_empty(&cv->wakeup))
288 {
289 proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink);
290 have_sentinel = true;
291 }
292 }
293 SpinLockRelease(&cv->mutex);
294
295 /* Awaken first waiter, if there was one. */
296 if (proc != NULL)
297 SetLatch(&proc->procLatch);
298
299 while (have_sentinel)
300 {
301 /*
302 * Each time through the loop, remove the first wakeup list entry, and
303 * signal it unless it's our sentinel. Repeat as long as the sentinel
304 * remains in the list.
305 *
306 * Notice that if someone else removes our sentinel, we will waken one
307 * additional process before exiting. That's intentional, because if
308 * someone else signals the CV, they may be intending to waken some
309 * third process that added itself to the list after we added the
310 * sentinel. Better to give a spurious wakeup (which should be
311 * harmless beyond wasting some cycles) than to lose a wakeup.
312 */
313 proc = NULL;
314 SpinLockAcquire(&cv->mutex);
315 if (!proclist_is_empty(&cv->wakeup))
316 proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
317 have_sentinel = proclist_contains(&cv->wakeup, pgprocno, cvWaitLink);
318 SpinLockRelease(&cv->mutex);
319
320 if (proc != NULL && proc != MyProc)
321 SetLatch(&proc->procLatch);
322 }
323}
324