| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * proc.c |
| 4 | * routines to manage per-process shared memory data structure |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/storage/lmgr/proc.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | /* |
| 16 | * Interface (a): |
| 17 | * ProcSleep(), ProcWakeup(), |
| 18 | * ProcQueueAlloc() -- create a shm queue for sleeping processes |
| 19 | * ProcQueueInit() -- create a queue without allocing memory |
| 20 | * |
| 21 | * Waiting for a lock causes the backend to be put to sleep. Whoever releases |
| 22 | * the lock wakes the process up again (and gives it an error code so it knows |
| 23 | * whether it was awoken on an error condition). |
| 24 | * |
| 25 | * Interface (b): |
| 26 | * |
| 27 | * ProcReleaseLocks -- frees the locks associated with current transaction |
| 28 | * |
| 29 | * ProcKill -- destroys the shared memory state (and locks) |
| 30 | * associated with the process. |
| 31 | */ |
| 32 | #include "postgres.h" |
| 33 | |
| 34 | #include <signal.h> |
| 35 | #include <unistd.h> |
| 36 | #include <sys/time.h> |
| 37 | |
| 38 | #include "access/transam.h" |
| 39 | #include "access/twophase.h" |
| 40 | #include "access/xact.h" |
| 41 | #include "miscadmin.h" |
| 42 | #include "pgstat.h" |
| 43 | #include "postmaster/autovacuum.h" |
| 44 | #include "replication/slot.h" |
| 45 | #include "replication/syncrep.h" |
| 46 | #include "replication/walsender.h" |
| 47 | #include "storage/condition_variable.h" |
| 48 | #include "storage/standby.h" |
| 49 | #include "storage/ipc.h" |
| 50 | #include "storage/lmgr.h" |
| 51 | #include "storage/pmsignal.h" |
| 52 | #include "storage/proc.h" |
| 53 | #include "storage/procarray.h" |
| 54 | #include "storage/procsignal.h" |
| 55 | #include "storage/spin.h" |
| 56 | #include "utils/timeout.h" |
| 57 | #include "utils/timestamp.h" |
| 58 | |
| 59 | |
| 60 | /* GUC variables */ |
| 61 | int DeadlockTimeout = 1000; |
| 62 | int StatementTimeout = 0; |
| 63 | int LockTimeout = 0; |
| 64 | int IdleInTransactionSessionTimeout = 0; |
| 65 | bool log_lock_waits = false; |
| 66 | |
| 67 | /* Pointer to this process's PGPROC and PGXACT structs, if any */ |
| 68 | PGPROC *MyProc = NULL; |
| 69 | PGXACT *MyPgXact = NULL; |
| 70 | |
| 71 | /* |
| 72 | * This spinlock protects the freelist of recycled PGPROC structures. |
| 73 | * We cannot use an LWLock because the LWLock manager depends on already |
| 74 | * having a PGPROC and a wait semaphore! But these structures are touched |
| 75 | * relatively infrequently (only at backend startup or shutdown) and not for |
| 76 | * very long, so a spinlock is okay. |
| 77 | */ |
| 78 | NON_EXEC_STATIC slock_t *ProcStructLock = NULL; |
| 79 | |
| 80 | /* Pointers to shared-memory structures */ |
| 81 | PROC_HDR *ProcGlobal = NULL; |
| 82 | NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL; |
| 83 | PGPROC *PreparedXactProcs = NULL; |
| 84 | |
| 85 | /* If we are waiting for a lock, this points to the associated LOCALLOCK */ |
| 86 | static LOCALLOCK *lockAwaited = NULL; |
| 87 | |
| 88 | static DeadLockState deadlock_state = DS_NOT_YET_CHECKED; |
| 89 | |
| 90 | /* Is a deadlock check pending? */ |
| 91 | static volatile sig_atomic_t got_deadlock_timeout; |
| 92 | |
| 93 | static void RemoveProcFromArray(int code, Datum arg); |
| 94 | static void ProcKill(int code, Datum arg); |
| 95 | static void AuxiliaryProcKill(int code, Datum arg); |
| 96 | static void CheckDeadLock(void); |
| 97 | |
| 98 | |
| 99 | /* |
| 100 | * Report shared-memory space needed by InitProcGlobal. |
| 101 | */ |
| 102 | Size |
| 103 | ProcGlobalShmemSize(void) |
| 104 | { |
| 105 | Size size = 0; |
| 106 | |
| 107 | /* ProcGlobal */ |
| 108 | size = add_size(size, sizeof(PROC_HDR)); |
| 109 | /* MyProcs, including autovacuum workers and launcher */ |
| 110 | size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC))); |
| 111 | /* AuxiliaryProcs */ |
| 112 | size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC))); |
| 113 | /* Prepared xacts */ |
| 114 | size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGPROC))); |
| 115 | /* ProcStructLock */ |
| 116 | size = add_size(size, sizeof(slock_t)); |
| 117 | |
| 118 | size = add_size(size, mul_size(MaxBackends, sizeof(PGXACT))); |
| 119 | size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGXACT))); |
| 120 | size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGXACT))); |
| 121 | |
| 122 | return size; |
| 123 | } |
| 124 | |
| 125 | /* |
| 126 | * Report number of semaphores needed by InitProcGlobal. |
| 127 | */ |
| 128 | int |
| 129 | ProcGlobalSemas(void) |
| 130 | { |
| 131 | /* |
| 132 | * We need a sema per backend (including autovacuum), plus one for each |
| 133 | * auxiliary process. |
| 134 | */ |
| 135 | return MaxBackends + NUM_AUXILIARY_PROCS; |
| 136 | } |
| 137 | |
| 138 | /* |
| 139 | * InitProcGlobal - |
| 140 | * Initialize the global process table during postmaster or standalone |
| 141 | * backend startup. |
| 142 | * |
| 143 | * We also create all the per-process semaphores we will need to support |
| 144 | * the requested number of backends. We used to allocate semaphores |
| 145 | * only when backends were actually started up, but that is bad because |
| 146 | * it lets Postgres fail under load --- a lot of Unix systems are |
| 147 | * (mis)configured with small limits on the number of semaphores, and |
| 148 | * running out when trying to start another backend is a common failure. |
| 149 | * So, now we grab enough semaphores to support the desired max number |
| 150 | * of backends immediately at initialization --- if the sysadmin has set |
| 151 | * MaxConnections, max_worker_processes, max_wal_senders, or |
| 152 | * autovacuum_max_workers higher than his kernel will support, he'll |
| 153 | * find out sooner rather than later. |
| 154 | * |
| 155 | * Another reason for creating semaphores here is that the semaphore |
| 156 | * implementation typically requires us to create semaphores in the |
| 157 | * postmaster, not in backends. |
| 158 | * |
| 159 | * Note: this is NOT called by individual backends under a postmaster, |
| 160 | * not even in the EXEC_BACKEND case. The ProcGlobal and AuxiliaryProcs |
| 161 | * pointers must be propagated specially for EXEC_BACKEND operation. |
| 162 | */ |
| 163 | void |
| 164 | InitProcGlobal(void) |
| 165 | { |
| 166 | PGPROC *procs; |
| 167 | PGXACT *pgxacts; |
| 168 | int i, |
| 169 | j; |
| 170 | bool found; |
| 171 | uint32 TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts; |
| 172 | |
| 173 | /* Create the ProcGlobal shared structure */ |
| 174 | ProcGlobal = (PROC_HDR *) |
| 175 | ShmemInitStruct("Proc Header" , sizeof(PROC_HDR), &found); |
| 176 | Assert(!found); |
| 177 | |
| 178 | /* |
| 179 | * Initialize the data structures. |
| 180 | */ |
| 181 | ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY; |
| 182 | ProcGlobal->freeProcs = NULL; |
| 183 | ProcGlobal->autovacFreeProcs = NULL; |
| 184 | ProcGlobal->bgworkerFreeProcs = NULL; |
| 185 | ProcGlobal->walsenderFreeProcs = NULL; |
| 186 | ProcGlobal->startupProc = NULL; |
| 187 | ProcGlobal->startupProcPid = 0; |
| 188 | ProcGlobal->startupBufferPinWaitBufId = -1; |
| 189 | ProcGlobal->walwriterLatch = NULL; |
| 190 | ProcGlobal->checkpointerLatch = NULL; |
| 191 | pg_atomic_init_u32(&ProcGlobal->procArrayGroupFirst, INVALID_PGPROCNO); |
| 192 | pg_atomic_init_u32(&ProcGlobal->clogGroupFirst, INVALID_PGPROCNO); |
| 193 | |
| 194 | /* |
| 195 | * Create and initialize all the PGPROC structures we'll need. There are |
| 196 | * five separate consumers: (1) normal backends, (2) autovacuum workers |
| 197 | * and the autovacuum launcher, (3) background workers, (4) auxiliary |
| 198 | * processes, and (5) prepared transactions. Each PGPROC structure is |
| 199 | * dedicated to exactly one of these purposes, and they do not move |
| 200 | * between groups. |
| 201 | */ |
| 202 | procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC)); |
| 203 | MemSet(procs, 0, TotalProcs * sizeof(PGPROC)); |
| 204 | ProcGlobal->allProcs = procs; |
| 205 | /* XXX allProcCount isn't really all of them; it excludes prepared xacts */ |
| 206 | ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS; |
| 207 | |
| 208 | /* |
| 209 | * Also allocate a separate array of PGXACT structures. This is separate |
| 210 | * from the main PGPROC array so that the most heavily accessed data is |
| 211 | * stored contiguously in memory in as few cache lines as possible. This |
| 212 | * provides significant performance benefits, especially on a |
| 213 | * multiprocessor system. There is one PGXACT structure for every PGPROC |
| 214 | * structure. |
| 215 | */ |
| 216 | pgxacts = (PGXACT *) ShmemAlloc(TotalProcs * sizeof(PGXACT)); |
| 217 | MemSet(pgxacts, 0, TotalProcs * sizeof(PGXACT)); |
| 218 | ProcGlobal->allPgXact = pgxacts; |
| 219 | |
| 220 | for (i = 0; i < TotalProcs; i++) |
| 221 | { |
| 222 | /* Common initialization for all PGPROCs, regardless of type. */ |
| 223 | |
| 224 | /* |
| 225 | * Set up per-PGPROC semaphore, latch, and backendLock. Prepared xact |
| 226 | * dummy PGPROCs don't need these though - they're never associated |
| 227 | * with a real process |
| 228 | */ |
| 229 | if (i < MaxBackends + NUM_AUXILIARY_PROCS) |
| 230 | { |
| 231 | procs[i].sem = PGSemaphoreCreate(); |
| 232 | InitSharedLatch(&(procs[i].procLatch)); |
| 233 | LWLockInitialize(&(procs[i].backendLock), LWTRANCHE_PROC); |
| 234 | } |
| 235 | procs[i].pgprocno = i; |
| 236 | |
| 237 | /* |
| 238 | * Newly created PGPROCs for normal backends, autovacuum and bgworkers |
| 239 | * must be queued up on the appropriate free list. Because there can |
| 240 | * only ever be a small, fixed number of auxiliary processes, no free |
| 241 | * list is used in that case; InitAuxiliaryProcess() instead uses a |
| 242 | * linear search. PGPROCs for prepared transactions are added to a |
| 243 | * free list by TwoPhaseShmemInit(). |
| 244 | */ |
| 245 | if (i < MaxConnections) |
| 246 | { |
| 247 | /* PGPROC for normal backend, add to freeProcs list */ |
| 248 | procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs; |
| 249 | ProcGlobal->freeProcs = &procs[i]; |
| 250 | procs[i].procgloballist = &ProcGlobal->freeProcs; |
| 251 | } |
| 252 | else if (i < MaxConnections + autovacuum_max_workers + 1) |
| 253 | { |
| 254 | /* PGPROC for AV launcher/worker, add to autovacFreeProcs list */ |
| 255 | procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs; |
| 256 | ProcGlobal->autovacFreeProcs = &procs[i]; |
| 257 | procs[i].procgloballist = &ProcGlobal->autovacFreeProcs; |
| 258 | } |
| 259 | else if (i < MaxConnections + autovacuum_max_workers + 1 + max_worker_processes) |
| 260 | { |
| 261 | /* PGPROC for bgworker, add to bgworkerFreeProcs list */ |
| 262 | procs[i].links.next = (SHM_QUEUE *) ProcGlobal->bgworkerFreeProcs; |
| 263 | ProcGlobal->bgworkerFreeProcs = &procs[i]; |
| 264 | procs[i].procgloballist = &ProcGlobal->bgworkerFreeProcs; |
| 265 | } |
| 266 | else if (i < MaxBackends) |
| 267 | { |
| 268 | /* PGPROC for walsender, add to walsenderFreeProcs list */ |
| 269 | procs[i].links.next = (SHM_QUEUE *) ProcGlobal->walsenderFreeProcs; |
| 270 | ProcGlobal->walsenderFreeProcs = &procs[i]; |
| 271 | procs[i].procgloballist = &ProcGlobal->walsenderFreeProcs; |
| 272 | } |
| 273 | |
| 274 | /* Initialize myProcLocks[] shared memory queues. */ |
| 275 | for (j = 0; j < NUM_LOCK_PARTITIONS; j++) |
| 276 | SHMQueueInit(&(procs[i].myProcLocks[j])); |
| 277 | |
| 278 | /* Initialize lockGroupMembers list. */ |
| 279 | dlist_init(&procs[i].lockGroupMembers); |
| 280 | |
| 281 | /* |
| 282 | * Initialize the atomic variables, otherwise, it won't be safe to |
| 283 | * access them for backends that aren't currently in use. |
| 284 | */ |
| 285 | pg_atomic_init_u32(&(procs[i].procArrayGroupNext), INVALID_PGPROCNO); |
| 286 | pg_atomic_init_u32(&(procs[i].clogGroupNext), INVALID_PGPROCNO); |
| 287 | } |
| 288 | |
| 289 | /* |
| 290 | * Save pointers to the blocks of PGPROC structures reserved for auxiliary |
| 291 | * processes and prepared transactions. |
| 292 | */ |
| 293 | AuxiliaryProcs = &procs[MaxBackends]; |
| 294 | PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS]; |
| 295 | |
| 296 | /* Create ProcStructLock spinlock, too */ |
| 297 | ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t)); |
| 298 | SpinLockInit(ProcStructLock); |
| 299 | } |
| 300 | |
| 301 | /* |
| 302 | * InitProcess -- initialize a per-process data structure for this backend |
| 303 | */ |
| 304 | void |
| 305 | InitProcess(void) |
| 306 | { |
| 307 | PGPROC *volatile *procgloballist; |
| 308 | |
| 309 | /* |
| 310 | * ProcGlobal should be set up already (if we are a backend, we inherit |
| 311 | * this by fork() or EXEC_BACKEND mechanism from the postmaster). |
| 312 | */ |
| 313 | if (ProcGlobal == NULL) |
| 314 | elog(PANIC, "proc header uninitialized" ); |
| 315 | |
| 316 | if (MyProc != NULL) |
| 317 | elog(ERROR, "you already exist" ); |
| 318 | |
| 319 | /* Decide which list should supply our PGPROC. */ |
| 320 | if (IsAnyAutoVacuumProcess()) |
| 321 | procgloballist = &ProcGlobal->autovacFreeProcs; |
| 322 | else if (IsBackgroundWorker) |
| 323 | procgloballist = &ProcGlobal->bgworkerFreeProcs; |
| 324 | else if (am_walsender) |
| 325 | procgloballist = &ProcGlobal->walsenderFreeProcs; |
| 326 | else |
| 327 | procgloballist = &ProcGlobal->freeProcs; |
| 328 | |
| 329 | /* |
| 330 | * Try to get a proc struct from the appropriate free list. If this |
| 331 | * fails, we must be out of PGPROC structures (not to mention semaphores). |
| 332 | * |
| 333 | * While we are holding the ProcStructLock, also copy the current shared |
| 334 | * estimate of spins_per_delay to local storage. |
| 335 | */ |
| 336 | SpinLockAcquire(ProcStructLock); |
| 337 | |
| 338 | set_spins_per_delay(ProcGlobal->spins_per_delay); |
| 339 | |
| 340 | MyProc = *procgloballist; |
| 341 | |
| 342 | if (MyProc != NULL) |
| 343 | { |
| 344 | *procgloballist = (PGPROC *) MyProc->links.next; |
| 345 | SpinLockRelease(ProcStructLock); |
| 346 | } |
| 347 | else |
| 348 | { |
| 349 | /* |
| 350 | * If we reach here, all the PGPROCs are in use. This is one of the |
| 351 | * possible places to detect "too many backends", so give the standard |
| 352 | * error message. XXX do we need to give a different failure message |
| 353 | * in the autovacuum case? |
| 354 | */ |
| 355 | SpinLockRelease(ProcStructLock); |
| 356 | if (am_walsender) |
| 357 | ereport(FATAL, |
| 358 | (errcode(ERRCODE_TOO_MANY_CONNECTIONS), |
| 359 | errmsg("number of requested standby connections exceeds max_wal_senders (currently %d)" , |
| 360 | max_wal_senders))); |
| 361 | ereport(FATAL, |
| 362 | (errcode(ERRCODE_TOO_MANY_CONNECTIONS), |
| 363 | errmsg("sorry, too many clients already" ))); |
| 364 | } |
| 365 | MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno]; |
| 366 | |
| 367 | /* |
| 368 | * Cross-check that the PGPROC is of the type we expect; if this were not |
| 369 | * the case, it would get returned to the wrong list. |
| 370 | */ |
| 371 | Assert(MyProc->procgloballist == procgloballist); |
| 372 | |
| 373 | /* |
| 374 | * Now that we have a PGPROC, mark ourselves as an active postmaster |
| 375 | * child; this is so that the postmaster can detect it if we exit without |
| 376 | * cleaning up. (XXX autovac launcher currently doesn't participate in |
| 377 | * this; it probably should.) |
| 378 | */ |
| 379 | if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess()) |
| 380 | MarkPostmasterChildActive(); |
| 381 | |
| 382 | /* |
| 383 | * Initialize all fields of MyProc, except for those previously |
| 384 | * initialized by InitProcGlobal. |
| 385 | */ |
| 386 | SHMQueueElemInit(&(MyProc->links)); |
| 387 | MyProc->waitStatus = STATUS_OK; |
| 388 | MyProc->lxid = InvalidLocalTransactionId; |
| 389 | MyProc->fpVXIDLock = false; |
| 390 | MyProc->fpLocalTransactionId = InvalidLocalTransactionId; |
| 391 | MyPgXact->xid = InvalidTransactionId; |
| 392 | MyPgXact->xmin = InvalidTransactionId; |
| 393 | MyProc->pid = MyProcPid; |
| 394 | /* backendId, databaseId and roleId will be filled in later */ |
| 395 | MyProc->backendId = InvalidBackendId; |
| 396 | MyProc->databaseId = InvalidOid; |
| 397 | MyProc->roleId = InvalidOid; |
| 398 | MyProc->tempNamespaceId = InvalidOid; |
| 399 | MyProc->isBackgroundWorker = IsBackgroundWorker; |
| 400 | MyPgXact->delayChkpt = false; |
| 401 | MyPgXact->vacuumFlags = 0; |
| 402 | /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ |
| 403 | if (IsAutoVacuumWorkerProcess()) |
| 404 | MyPgXact->vacuumFlags |= PROC_IS_AUTOVACUUM; |
| 405 | MyProc->lwWaiting = false; |
| 406 | MyProc->lwWaitMode = 0; |
| 407 | MyProc->waitLock = NULL; |
| 408 | MyProc->waitProcLock = NULL; |
| 409 | #ifdef USE_ASSERT_CHECKING |
| 410 | { |
| 411 | int i; |
| 412 | |
| 413 | /* Last process should have released all locks. */ |
| 414 | for (i = 0; i < NUM_LOCK_PARTITIONS; i++) |
| 415 | Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i]))); |
| 416 | } |
| 417 | #endif |
| 418 | MyProc->recoveryConflictPending = false; |
| 419 | |
| 420 | /* Initialize fields for sync rep */ |
| 421 | MyProc->waitLSN = 0; |
| 422 | MyProc->syncRepState = SYNC_REP_NOT_WAITING; |
| 423 | SHMQueueElemInit(&(MyProc->syncRepLinks)); |
| 424 | |
| 425 | /* Initialize fields for group XID clearing. */ |
| 426 | MyProc->procArrayGroupMember = false; |
| 427 | MyProc->procArrayGroupMemberXid = InvalidTransactionId; |
| 428 | Assert(pg_atomic_read_u32(&MyProc->procArrayGroupNext) == INVALID_PGPROCNO); |
| 429 | |
| 430 | /* Check that group locking fields are in a proper initial state. */ |
| 431 | Assert(MyProc->lockGroupLeader == NULL); |
| 432 | Assert(dlist_is_empty(&MyProc->lockGroupMembers)); |
| 433 | |
| 434 | /* Initialize wait event information. */ |
| 435 | MyProc->wait_event_info = 0; |
| 436 | |
| 437 | /* Initialize fields for group transaction status update. */ |
| 438 | MyProc->clogGroupMember = false; |
| 439 | MyProc->clogGroupMemberXid = InvalidTransactionId; |
| 440 | MyProc->clogGroupMemberXidStatus = TRANSACTION_STATUS_IN_PROGRESS; |
| 441 | MyProc->clogGroupMemberPage = -1; |
| 442 | MyProc->clogGroupMemberLsn = InvalidXLogRecPtr; |
| 443 | Assert(pg_atomic_read_u32(&MyProc->clogGroupNext) == INVALID_PGPROCNO); |
| 444 | |
| 445 | /* |
| 446 | * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch |
| 447 | * on it. That allows us to repoint the process latch, which so far |
| 448 | * points to process local one, to the shared one. |
| 449 | */ |
| 450 | OwnLatch(&MyProc->procLatch); |
| 451 | SwitchToSharedLatch(); |
| 452 | |
| 453 | /* |
| 454 | * We might be reusing a semaphore that belonged to a failed process. So |
| 455 | * be careful and reinitialize its value here. (This is not strictly |
| 456 | * necessary anymore, but seems like a good idea for cleanliness.) |
| 457 | */ |
| 458 | PGSemaphoreReset(MyProc->sem); |
| 459 | |
| 460 | /* |
| 461 | * Arrange to clean up at backend exit. |
| 462 | */ |
| 463 | on_shmem_exit(ProcKill, 0); |
| 464 | |
| 465 | /* |
| 466 | * Now that we have a PGPROC, we could try to acquire locks, so initialize |
| 467 | * local state needed for LWLocks, and the deadlock checker. |
| 468 | */ |
| 469 | InitLWLockAccess(); |
| 470 | InitDeadLockChecking(); |
| 471 | } |
| 472 | |
| 473 | /* |
| 474 | * InitProcessPhase2 -- make MyProc visible in the shared ProcArray. |
| 475 | * |
| 476 | * This is separate from InitProcess because we can't acquire LWLocks until |
| 477 | * we've created a PGPROC, but in the EXEC_BACKEND case ProcArrayAdd won't |
| 478 | * work until after we've done CreateSharedMemoryAndSemaphores. |
| 479 | */ |
| 480 | void |
| 481 | InitProcessPhase2(void) |
| 482 | { |
| 483 | Assert(MyProc != NULL); |
| 484 | |
| 485 | /* |
| 486 | * Add our PGPROC to the PGPROC array in shared memory. |
| 487 | */ |
| 488 | ProcArrayAdd(MyProc); |
| 489 | |
| 490 | /* |
| 491 | * Arrange to clean that up at backend exit. |
| 492 | */ |
| 493 | on_shmem_exit(RemoveProcFromArray, 0); |
| 494 | } |
| 495 | |
| 496 | /* |
| 497 | * InitAuxiliaryProcess -- create a per-auxiliary-process data structure |
| 498 | * |
| 499 | * This is called by bgwriter and similar processes so that they will have a |
| 500 | * MyProc value that's real enough to let them wait for LWLocks. The PGPROC |
| 501 | * and sema that are assigned are one of the extra ones created during |
| 502 | * InitProcGlobal. |
| 503 | * |
| 504 | * Auxiliary processes are presently not expected to wait for real (lockmgr) |
| 505 | * locks, so we need not set up the deadlock checker. They are never added |
| 506 | * to the ProcArray or the sinval messaging mechanism, either. They also |
| 507 | * don't get a VXID assigned, since this is only useful when we actually |
| 508 | * hold lockmgr locks. |
| 509 | * |
| 510 | * Startup process however uses locks but never waits for them in the |
| 511 | * normal backend sense. Startup process also takes part in sinval messaging |
| 512 | * as a sendOnly process, so never reads messages from sinval queue. So |
| 513 | * Startup process does have a VXID and does show up in pg_locks. |
| 514 | */ |
| 515 | void |
| 516 | InitAuxiliaryProcess(void) |
| 517 | { |
| 518 | PGPROC *auxproc; |
| 519 | int proctype; |
| 520 | |
| 521 | /* |
| 522 | * ProcGlobal should be set up already (if we are a backend, we inherit |
| 523 | * this by fork() or EXEC_BACKEND mechanism from the postmaster). |
| 524 | */ |
| 525 | if (ProcGlobal == NULL || AuxiliaryProcs == NULL) |
| 526 | elog(PANIC, "proc header uninitialized" ); |
| 527 | |
| 528 | if (MyProc != NULL) |
| 529 | elog(ERROR, "you already exist" ); |
| 530 | |
| 531 | /* |
| 532 | * We use the ProcStructLock to protect assignment and releasing of |
| 533 | * AuxiliaryProcs entries. |
| 534 | * |
| 535 | * While we are holding the ProcStructLock, also copy the current shared |
| 536 | * estimate of spins_per_delay to local storage. |
| 537 | */ |
| 538 | SpinLockAcquire(ProcStructLock); |
| 539 | |
| 540 | set_spins_per_delay(ProcGlobal->spins_per_delay); |
| 541 | |
| 542 | /* |
| 543 | * Find a free auxproc ... *big* trouble if there isn't one ... |
| 544 | */ |
| 545 | for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++) |
| 546 | { |
| 547 | auxproc = &AuxiliaryProcs[proctype]; |
| 548 | if (auxproc->pid == 0) |
| 549 | break; |
| 550 | } |
| 551 | if (proctype >= NUM_AUXILIARY_PROCS) |
| 552 | { |
| 553 | SpinLockRelease(ProcStructLock); |
| 554 | elog(FATAL, "all AuxiliaryProcs are in use" ); |
| 555 | } |
| 556 | |
| 557 | /* Mark auxiliary proc as in use by me */ |
| 558 | /* use volatile pointer to prevent code rearrangement */ |
| 559 | ((volatile PGPROC *) auxproc)->pid = MyProcPid; |
| 560 | |
| 561 | MyProc = auxproc; |
| 562 | MyPgXact = &ProcGlobal->allPgXact[auxproc->pgprocno]; |
| 563 | |
| 564 | SpinLockRelease(ProcStructLock); |
| 565 | |
| 566 | /* |
| 567 | * Initialize all fields of MyProc, except for those previously |
| 568 | * initialized by InitProcGlobal. |
| 569 | */ |
| 570 | SHMQueueElemInit(&(MyProc->links)); |
| 571 | MyProc->waitStatus = STATUS_OK; |
| 572 | MyProc->lxid = InvalidLocalTransactionId; |
| 573 | MyProc->fpVXIDLock = false; |
| 574 | MyProc->fpLocalTransactionId = InvalidLocalTransactionId; |
| 575 | MyPgXact->xid = InvalidTransactionId; |
| 576 | MyPgXact->xmin = InvalidTransactionId; |
| 577 | MyProc->backendId = InvalidBackendId; |
| 578 | MyProc->databaseId = InvalidOid; |
| 579 | MyProc->roleId = InvalidOid; |
| 580 | MyProc->tempNamespaceId = InvalidOid; |
| 581 | MyProc->isBackgroundWorker = IsBackgroundWorker; |
| 582 | MyPgXact->delayChkpt = false; |
| 583 | MyPgXact->vacuumFlags = 0; |
| 584 | MyProc->lwWaiting = false; |
| 585 | MyProc->lwWaitMode = 0; |
| 586 | MyProc->waitLock = NULL; |
| 587 | MyProc->waitProcLock = NULL; |
| 588 | #ifdef USE_ASSERT_CHECKING |
| 589 | { |
| 590 | int i; |
| 591 | |
| 592 | /* Last process should have released all locks. */ |
| 593 | for (i = 0; i < NUM_LOCK_PARTITIONS; i++) |
| 594 | Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i]))); |
| 595 | } |
| 596 | #endif |
| 597 | |
| 598 | /* |
| 599 | * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch |
| 600 | * on it. That allows us to repoint the process latch, which so far |
| 601 | * points to process local one, to the shared one. |
| 602 | */ |
| 603 | OwnLatch(&MyProc->procLatch); |
| 604 | SwitchToSharedLatch(); |
| 605 | |
| 606 | /* Check that group locking fields are in a proper initial state. */ |
| 607 | Assert(MyProc->lockGroupLeader == NULL); |
| 608 | Assert(dlist_is_empty(&MyProc->lockGroupMembers)); |
| 609 | |
| 610 | /* |
| 611 | * We might be reusing a semaphore that belonged to a failed process. So |
| 612 | * be careful and reinitialize its value here. (This is not strictly |
| 613 | * necessary anymore, but seems like a good idea for cleanliness.) |
| 614 | */ |
| 615 | PGSemaphoreReset(MyProc->sem); |
| 616 | |
| 617 | /* |
| 618 | * Arrange to clean up at process exit. |
| 619 | */ |
| 620 | on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype)); |
| 621 | } |
| 622 | |
| 623 | /* |
| 624 | * Record the PID and PGPROC structures for the Startup process, for use in |
| 625 | * ProcSendSignal(). See comments there for further explanation. |
| 626 | */ |
| 627 | void |
| 628 | PublishStartupProcessInformation(void) |
| 629 | { |
| 630 | SpinLockAcquire(ProcStructLock); |
| 631 | |
| 632 | ProcGlobal->startupProc = MyProc; |
| 633 | ProcGlobal->startupProcPid = MyProcPid; |
| 634 | |
| 635 | SpinLockRelease(ProcStructLock); |
| 636 | } |
| 637 | |
| 638 | /* |
| 639 | * Used from bufgr to share the value of the buffer that Startup waits on, |
| 640 | * or to reset the value to "not waiting" (-1). This allows processing |
| 641 | * of recovery conflicts for buffer pins. Set is made before backends look |
| 642 | * at this value, so locking not required, especially since the set is |
| 643 | * an atomic integer set operation. |
| 644 | */ |
| 645 | void |
| 646 | SetStartupBufferPinWaitBufId(int bufid) |
| 647 | { |
| 648 | /* use volatile pointer to prevent code rearrangement */ |
| 649 | volatile PROC_HDR *procglobal = ProcGlobal; |
| 650 | |
| 651 | procglobal->startupBufferPinWaitBufId = bufid; |
| 652 | } |
| 653 | |
| 654 | /* |
| 655 | * Used by backends when they receive a request to check for buffer pin waits. |
| 656 | */ |
| 657 | int |
| 658 | GetStartupBufferPinWaitBufId(void) |
| 659 | { |
| 660 | /* use volatile pointer to prevent code rearrangement */ |
| 661 | volatile PROC_HDR *procglobal = ProcGlobal; |
| 662 | |
| 663 | return procglobal->startupBufferPinWaitBufId; |
| 664 | } |
| 665 | |
| 666 | /* |
| 667 | * Check whether there are at least N free PGPROC objects. |
| 668 | * |
| 669 | * Note: this is designed on the assumption that N will generally be small. |
| 670 | */ |
| 671 | bool |
| 672 | HaveNFreeProcs(int n) |
| 673 | { |
| 674 | PGPROC *proc; |
| 675 | |
| 676 | SpinLockAcquire(ProcStructLock); |
| 677 | |
| 678 | proc = ProcGlobal->freeProcs; |
| 679 | |
| 680 | while (n > 0 && proc != NULL) |
| 681 | { |
| 682 | proc = (PGPROC *) proc->links.next; |
| 683 | n--; |
| 684 | } |
| 685 | |
| 686 | SpinLockRelease(ProcStructLock); |
| 687 | |
| 688 | return (n <= 0); |
| 689 | } |
| 690 | |
| 691 | /* |
| 692 | * Check if the current process is awaiting a lock. |
| 693 | */ |
| 694 | bool |
| 695 | IsWaitingForLock(void) |
| 696 | { |
| 697 | if (lockAwaited == NULL) |
| 698 | return false; |
| 699 | |
| 700 | return true; |
| 701 | } |
| 702 | |
| 703 | /* |
| 704 | * Cancel any pending wait for lock, when aborting a transaction, and revert |
| 705 | * any strong lock count acquisition for a lock being acquired. |
| 706 | * |
| 707 | * (Normally, this would only happen if we accept a cancel/die |
| 708 | * interrupt while waiting; but an ereport(ERROR) before or during the lock |
| 709 | * wait is within the realm of possibility, too.) |
| 710 | */ |
| 711 | void |
| 712 | LockErrorCleanup(void) |
| 713 | { |
| 714 | LWLock *partitionLock; |
| 715 | DisableTimeoutParams timeouts[2]; |
| 716 | |
| 717 | HOLD_INTERRUPTS(); |
| 718 | |
| 719 | AbortStrongLockAcquire(); |
| 720 | |
| 721 | /* Nothing to do if we weren't waiting for a lock */ |
| 722 | if (lockAwaited == NULL) |
| 723 | { |
| 724 | RESUME_INTERRUPTS(); |
| 725 | return; |
| 726 | } |
| 727 | |
| 728 | /* |
| 729 | * Turn off the deadlock and lock timeout timers, if they are still |
| 730 | * running (see ProcSleep). Note we must preserve the LOCK_TIMEOUT |
| 731 | * indicator flag, since this function is executed before |
| 732 | * ProcessInterrupts when responding to SIGINT; else we'd lose the |
| 733 | * knowledge that the SIGINT came from a lock timeout and not an external |
| 734 | * source. |
| 735 | */ |
| 736 | timeouts[0].id = DEADLOCK_TIMEOUT; |
| 737 | timeouts[0].keep_indicator = false; |
| 738 | timeouts[1].id = LOCK_TIMEOUT; |
| 739 | timeouts[1].keep_indicator = true; |
| 740 | disable_timeouts(timeouts, 2); |
| 741 | |
| 742 | /* Unlink myself from the wait queue, if on it (might not be anymore!) */ |
| 743 | partitionLock = LockHashPartitionLock(lockAwaited->hashcode); |
| 744 | LWLockAcquire(partitionLock, LW_EXCLUSIVE); |
| 745 | |
| 746 | if (MyProc->links.next != NULL) |
| 747 | { |
| 748 | /* We could not have been granted the lock yet */ |
| 749 | RemoveFromWaitQueue(MyProc, lockAwaited->hashcode); |
| 750 | } |
| 751 | else |
| 752 | { |
| 753 | /* |
| 754 | * Somebody kicked us off the lock queue already. Perhaps they |
| 755 | * granted us the lock, or perhaps they detected a deadlock. If they |
| 756 | * did grant us the lock, we'd better remember it in our local lock |
| 757 | * table. |
| 758 | */ |
| 759 | if (MyProc->waitStatus == STATUS_OK) |
| 760 | GrantAwaitedLock(); |
| 761 | } |
| 762 | |
| 763 | lockAwaited = NULL; |
| 764 | |
| 765 | LWLockRelease(partitionLock); |
| 766 | |
| 767 | RESUME_INTERRUPTS(); |
| 768 | } |
| 769 | |
| 770 | |
| 771 | /* |
| 772 | * ProcReleaseLocks() -- release locks associated with current transaction |
| 773 | * at main transaction commit or abort |
| 774 | * |
| 775 | * At main transaction commit, we release standard locks except session locks. |
| 776 | * At main transaction abort, we release all locks including session locks. |
| 777 | * |
| 778 | * Advisory locks are released only if they are transaction-level; |
| 779 | * session-level holds remain, whether this is a commit or not. |
| 780 | * |
| 781 | * At subtransaction commit, we don't release any locks (so this func is not |
| 782 | * needed at all); we will defer the releasing to the parent transaction. |
| 783 | * At subtransaction abort, we release all locks held by the subtransaction; |
| 784 | * this is implemented by retail releasing of the locks under control of |
| 785 | * the ResourceOwner mechanism. |
| 786 | */ |
| 787 | void |
| 788 | ProcReleaseLocks(bool isCommit) |
| 789 | { |
| 790 | if (!MyProc) |
| 791 | return; |
| 792 | /* If waiting, get off wait queue (should only be needed after error) */ |
| 793 | LockErrorCleanup(); |
| 794 | /* Release standard locks, including session-level if aborting */ |
| 795 | LockReleaseAll(DEFAULT_LOCKMETHOD, !isCommit); |
| 796 | /* Release transaction-level advisory locks */ |
| 797 | LockReleaseAll(USER_LOCKMETHOD, false); |
| 798 | } |
| 799 | |
| 800 | |
| 801 | /* |
| 802 | * RemoveProcFromArray() -- Remove this process from the shared ProcArray. |
| 803 | */ |
| 804 | static void |
| 805 | RemoveProcFromArray(int code, Datum arg) |
| 806 | { |
| 807 | Assert(MyProc != NULL); |
| 808 | ProcArrayRemove(MyProc, InvalidTransactionId); |
| 809 | } |
| 810 | |
| 811 | /* |
| 812 | * ProcKill() -- Destroy the per-proc data structure for |
| 813 | * this process. Release any of its held LW locks. |
| 814 | */ |
| 815 | static void |
| 816 | ProcKill(int code, Datum arg) |
| 817 | { |
| 818 | PGPROC *proc; |
| 819 | PGPROC *volatile *procgloballist; |
| 820 | |
| 821 | Assert(MyProc != NULL); |
| 822 | |
| 823 | /* Make sure we're out of the sync rep lists */ |
| 824 | SyncRepCleanupAtProcExit(); |
| 825 | |
| 826 | #ifdef USE_ASSERT_CHECKING |
| 827 | { |
| 828 | int i; |
| 829 | |
| 830 | /* Last process should have released all locks. */ |
| 831 | for (i = 0; i < NUM_LOCK_PARTITIONS; i++) |
| 832 | Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i]))); |
| 833 | } |
| 834 | #endif |
| 835 | |
| 836 | /* |
| 837 | * Release any LW locks I am holding. There really shouldn't be any, but |
| 838 | * it's cheap to check again before we cut the knees off the LWLock |
| 839 | * facility by releasing our PGPROC ... |
| 840 | */ |
| 841 | LWLockReleaseAll(); |
| 842 | |
| 843 | /* Cancel any pending condition variable sleep, too */ |
| 844 | ConditionVariableCancelSleep(); |
| 845 | |
| 846 | /* Make sure active replication slots are released */ |
| 847 | if (MyReplicationSlot != NULL) |
| 848 | ReplicationSlotRelease(); |
| 849 | |
| 850 | /* Also cleanup all the temporary slots. */ |
| 851 | ReplicationSlotCleanup(); |
| 852 | |
| 853 | /* |
| 854 | * Detach from any lock group of which we are a member. If the leader |
| 855 | * exist before all other group members, it's PGPROC will remain allocated |
| 856 | * until the last group process exits; that process must return the |
| 857 | * leader's PGPROC to the appropriate list. |
| 858 | */ |
| 859 | if (MyProc->lockGroupLeader != NULL) |
| 860 | { |
| 861 | PGPROC *leader = MyProc->lockGroupLeader; |
| 862 | LWLock *leader_lwlock = LockHashPartitionLockByProc(leader); |
| 863 | |
| 864 | LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); |
| 865 | Assert(!dlist_is_empty(&leader->lockGroupMembers)); |
| 866 | dlist_delete(&MyProc->lockGroupLink); |
| 867 | if (dlist_is_empty(&leader->lockGroupMembers)) |
| 868 | { |
| 869 | leader->lockGroupLeader = NULL; |
| 870 | if (leader != MyProc) |
| 871 | { |
| 872 | procgloballist = leader->procgloballist; |
| 873 | |
| 874 | /* Leader exited first; return its PGPROC. */ |
| 875 | SpinLockAcquire(ProcStructLock); |
| 876 | leader->links.next = (SHM_QUEUE *) *procgloballist; |
| 877 | *procgloballist = leader; |
| 878 | SpinLockRelease(ProcStructLock); |
| 879 | } |
| 880 | } |
| 881 | else if (leader != MyProc) |
| 882 | MyProc->lockGroupLeader = NULL; |
| 883 | LWLockRelease(leader_lwlock); |
| 884 | } |
| 885 | |
| 886 | /* |
| 887 | * Reset MyLatch to the process local one. This is so that signal |
| 888 | * handlers et al can continue using the latch after the shared latch |
| 889 | * isn't ours anymore. After that clear MyProc and disown the shared |
| 890 | * latch. |
| 891 | */ |
| 892 | SwitchBackToLocalLatch(); |
| 893 | proc = MyProc; |
| 894 | MyProc = NULL; |
| 895 | DisownLatch(&proc->procLatch); |
| 896 | |
| 897 | procgloballist = proc->procgloballist; |
| 898 | SpinLockAcquire(ProcStructLock); |
| 899 | |
| 900 | /* |
| 901 | * If we're still a member of a locking group, that means we're a leader |
| 902 | * which has somehow exited before its children. The last remaining child |
| 903 | * will release our PGPROC. Otherwise, release it now. |
| 904 | */ |
| 905 | if (proc->lockGroupLeader == NULL) |
| 906 | { |
| 907 | /* Since lockGroupLeader is NULL, lockGroupMembers should be empty. */ |
| 908 | Assert(dlist_is_empty(&proc->lockGroupMembers)); |
| 909 | |
| 910 | /* Return PGPROC structure (and semaphore) to appropriate freelist */ |
| 911 | proc->links.next = (SHM_QUEUE *) *procgloballist; |
| 912 | *procgloballist = proc; |
| 913 | } |
| 914 | |
| 915 | /* Update shared estimate of spins_per_delay */ |
| 916 | ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay); |
| 917 | |
| 918 | SpinLockRelease(ProcStructLock); |
| 919 | |
| 920 | /* |
| 921 | * This process is no longer present in shared memory in any meaningful |
| 922 | * way, so tell the postmaster we've cleaned up acceptably well. (XXX |
| 923 | * autovac launcher should be included here someday) |
| 924 | */ |
| 925 | if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess()) |
| 926 | MarkPostmasterChildInactive(); |
| 927 | |
| 928 | /* wake autovac launcher if needed -- see comments in FreeWorkerInfo */ |
| 929 | if (AutovacuumLauncherPid != 0) |
| 930 | kill(AutovacuumLauncherPid, SIGUSR2); |
| 931 | } |
| 932 | |
| 933 | /* |
| 934 | * AuxiliaryProcKill() -- Cut-down version of ProcKill for auxiliary |
| 935 | * processes (bgwriter, etc). The PGPROC and sema are not released, only |
| 936 | * marked as not-in-use. |
| 937 | */ |
| 938 | static void |
| 939 | AuxiliaryProcKill(int code, Datum arg) |
| 940 | { |
| 941 | int proctype = DatumGetInt32(arg); |
| 942 | PGPROC *auxproc PG_USED_FOR_ASSERTS_ONLY; |
| 943 | PGPROC *proc; |
| 944 | |
| 945 | Assert(proctype >= 0 && proctype < NUM_AUXILIARY_PROCS); |
| 946 | |
| 947 | auxproc = &AuxiliaryProcs[proctype]; |
| 948 | |
| 949 | Assert(MyProc == auxproc); |
| 950 | |
| 951 | /* Release any LW locks I am holding (see notes above) */ |
| 952 | LWLockReleaseAll(); |
| 953 | |
| 954 | /* Cancel any pending condition variable sleep, too */ |
| 955 | ConditionVariableCancelSleep(); |
| 956 | |
| 957 | /* |
| 958 | * Reset MyLatch to the process local one. This is so that signal |
| 959 | * handlers et al can continue using the latch after the shared latch |
| 960 | * isn't ours anymore. After that clear MyProc and disown the shared |
| 961 | * latch. |
| 962 | */ |
| 963 | SwitchBackToLocalLatch(); |
| 964 | proc = MyProc; |
| 965 | MyProc = NULL; |
| 966 | DisownLatch(&proc->procLatch); |
| 967 | |
| 968 | SpinLockAcquire(ProcStructLock); |
| 969 | |
| 970 | /* Mark auxiliary proc no longer in use */ |
| 971 | proc->pid = 0; |
| 972 | |
| 973 | /* Update shared estimate of spins_per_delay */ |
| 974 | ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay); |
| 975 | |
| 976 | SpinLockRelease(ProcStructLock); |
| 977 | } |
| 978 | |
| 979 | /* |
| 980 | * AuxiliaryPidGetProc -- get PGPROC for an auxiliary process |
| 981 | * given its PID |
| 982 | * |
| 983 | * Returns NULL if not found. |
| 984 | */ |
| 985 | PGPROC * |
| 986 | AuxiliaryPidGetProc(int pid) |
| 987 | { |
| 988 | PGPROC *result = NULL; |
| 989 | int index; |
| 990 | |
| 991 | if (pid == 0) /* never match dummy PGPROCs */ |
| 992 | return NULL; |
| 993 | |
| 994 | for (index = 0; index < NUM_AUXILIARY_PROCS; index++) |
| 995 | { |
| 996 | PGPROC *proc = &AuxiliaryProcs[index]; |
| 997 | |
| 998 | if (proc->pid == pid) |
| 999 | { |
| 1000 | result = proc; |
| 1001 | break; |
| 1002 | } |
| 1003 | } |
| 1004 | return result; |
| 1005 | } |
| 1006 | |
| 1007 | /* |
| 1008 | * ProcQueue package: routines for putting processes to sleep |
| 1009 | * and waking them up |
| 1010 | */ |
| 1011 | |
| 1012 | /* |
| 1013 | * ProcQueueAlloc -- alloc/attach to a shared memory process queue |
| 1014 | * |
| 1015 | * Returns: a pointer to the queue |
| 1016 | * Side Effects: Initializes the queue if it wasn't there before |
| 1017 | */ |
| 1018 | #ifdef NOT_USED |
| 1019 | PROC_QUEUE * |
| 1020 | ProcQueueAlloc(const char *name) |
| 1021 | { |
| 1022 | PROC_QUEUE *queue; |
| 1023 | bool found; |
| 1024 | |
| 1025 | queue = (PROC_QUEUE *) |
| 1026 | ShmemInitStruct(name, sizeof(PROC_QUEUE), &found); |
| 1027 | |
| 1028 | if (!found) |
| 1029 | ProcQueueInit(queue); |
| 1030 | |
| 1031 | return queue; |
| 1032 | } |
| 1033 | #endif |
| 1034 | |
| 1035 | /* |
| 1036 | * ProcQueueInit -- initialize a shared memory process queue |
| 1037 | */ |
| 1038 | void |
| 1039 | ProcQueueInit(PROC_QUEUE *queue) |
| 1040 | { |
| 1041 | SHMQueueInit(&(queue->links)); |
| 1042 | queue->size = 0; |
| 1043 | } |
| 1044 | |
| 1045 | |
| 1046 | /* |
| 1047 | * ProcSleep -- put a process to sleep on the specified lock |
| 1048 | * |
| 1049 | * Caller must have set MyProc->heldLocks to reflect locks already held |
| 1050 | * on the lockable object by this process (under all XIDs). |
| 1051 | * |
| 1052 | * The lock table's partition lock must be held at entry, and will be held |
| 1053 | * at exit. |
| 1054 | * |
| 1055 | * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). |
| 1056 | * |
| 1057 | * ASSUME: that no one will fiddle with the queue until after |
| 1058 | * we release the partition lock. |
| 1059 | * |
| 1060 | * NOTES: The process queue is now a priority queue for locking. |
| 1061 | */ |
| 1062 | int |
| 1063 | ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) |
| 1064 | { |
| 1065 | LOCKMODE lockmode = locallock->tag.mode; |
| 1066 | LOCK *lock = locallock->lock; |
| 1067 | PROCLOCK *proclock = locallock->proclock; |
| 1068 | uint32 hashcode = locallock->hashcode; |
| 1069 | LWLock *partitionLock = LockHashPartitionLock(hashcode); |
| 1070 | PROC_QUEUE *waitQueue = &(lock->waitProcs); |
| 1071 | LOCKMASK myHeldLocks = MyProc->heldLocks; |
| 1072 | bool early_deadlock = false; |
| 1073 | bool allow_autovacuum_cancel = true; |
| 1074 | int myWaitStatus; |
| 1075 | PGPROC *proc; |
| 1076 | PGPROC *leader = MyProc->lockGroupLeader; |
| 1077 | int i; |
| 1078 | |
| 1079 | /* |
| 1080 | * If group locking is in use, locks held by members of my locking group |
| 1081 | * need to be included in myHeldLocks. |
| 1082 | */ |
| 1083 | if (leader != NULL) |
| 1084 | { |
| 1085 | SHM_QUEUE *procLocks = &(lock->procLocks); |
| 1086 | PROCLOCK *otherproclock; |
| 1087 | |
| 1088 | otherproclock = (PROCLOCK *) |
| 1089 | SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); |
| 1090 | while (otherproclock != NULL) |
| 1091 | { |
| 1092 | if (otherproclock->groupLeader == leader) |
| 1093 | myHeldLocks |= otherproclock->holdMask; |
| 1094 | otherproclock = (PROCLOCK *) |
| 1095 | SHMQueueNext(procLocks, &otherproclock->lockLink, |
| 1096 | offsetof(PROCLOCK, lockLink)); |
| 1097 | } |
| 1098 | } |
| 1099 | |
| 1100 | /* |
| 1101 | * Determine where to add myself in the wait queue. |
| 1102 | * |
| 1103 | * Normally I should go at the end of the queue. However, if I already |
| 1104 | * hold locks that conflict with the request of any previous waiter, put |
| 1105 | * myself in the queue just in front of the first such waiter. This is not |
| 1106 | * a necessary step, since deadlock detection would move me to before that |
| 1107 | * waiter anyway; but it's relatively cheap to detect such a conflict |
| 1108 | * immediately, and avoid delaying till deadlock timeout. |
| 1109 | * |
| 1110 | * Special case: if I find I should go in front of some waiter, check to |
| 1111 | * see if I conflict with already-held locks or the requests before that |
| 1112 | * waiter. If not, then just grant myself the requested lock immediately. |
| 1113 | * This is the same as the test for immediate grant in LockAcquire, except |
| 1114 | * we are only considering the part of the wait queue before my insertion |
| 1115 | * point. |
| 1116 | */ |
| 1117 | if (myHeldLocks != 0) |
| 1118 | { |
| 1119 | LOCKMASK aheadRequests = 0; |
| 1120 | |
| 1121 | proc = (PGPROC *) waitQueue->links.next; |
| 1122 | for (i = 0; i < waitQueue->size; i++) |
| 1123 | { |
| 1124 | /* |
| 1125 | * If we're part of the same locking group as this waiter, its |
| 1126 | * locks neither conflict with ours nor contribute to |
| 1127 | * aheadRequests. |
| 1128 | */ |
| 1129 | if (leader != NULL && leader == proc->lockGroupLeader) |
| 1130 | { |
| 1131 | proc = (PGPROC *) proc->links.next; |
| 1132 | continue; |
| 1133 | } |
| 1134 | /* Must he wait for me? */ |
| 1135 | if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) |
| 1136 | { |
| 1137 | /* Must I wait for him ? */ |
| 1138 | if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks) |
| 1139 | { |
| 1140 | /* |
| 1141 | * Yes, so we have a deadlock. Easiest way to clean up |
| 1142 | * correctly is to call RemoveFromWaitQueue(), but we |
| 1143 | * can't do that until we are *on* the wait queue. So, set |
| 1144 | * a flag to check below, and break out of loop. Also, |
| 1145 | * record deadlock info for later message. |
| 1146 | */ |
| 1147 | RememberSimpleDeadLock(MyProc, lockmode, lock, proc); |
| 1148 | early_deadlock = true; |
| 1149 | break; |
| 1150 | } |
| 1151 | /* I must go before this waiter. Check special case. */ |
| 1152 | if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && |
| 1153 | LockCheckConflicts(lockMethodTable, |
| 1154 | lockmode, |
| 1155 | lock, |
| 1156 | proclock) == STATUS_OK) |
| 1157 | { |
| 1158 | /* Skip the wait and just grant myself the lock. */ |
| 1159 | GrantLock(lock, proclock, lockmode); |
| 1160 | GrantAwaitedLock(); |
| 1161 | return STATUS_OK; |
| 1162 | } |
| 1163 | /* Break out of loop to put myself before him */ |
| 1164 | break; |
| 1165 | } |
| 1166 | /* Nope, so advance to next waiter */ |
| 1167 | aheadRequests |= LOCKBIT_ON(proc->waitLockMode); |
| 1168 | proc = (PGPROC *) proc->links.next; |
| 1169 | } |
| 1170 | |
| 1171 | /* |
| 1172 | * If we fall out of loop normally, proc points to waitQueue head, so |
| 1173 | * we will insert at tail of queue as desired. |
| 1174 | */ |
| 1175 | } |
| 1176 | else |
| 1177 | { |
| 1178 | /* I hold no locks, so I can't push in front of anyone. */ |
| 1179 | proc = (PGPROC *) &(waitQueue->links); |
| 1180 | } |
| 1181 | |
| 1182 | /* |
| 1183 | * Insert self into queue, ahead of the given proc (or at tail of queue). |
| 1184 | */ |
| 1185 | SHMQueueInsertBefore(&(proc->links), &(MyProc->links)); |
| 1186 | waitQueue->size++; |
| 1187 | |
| 1188 | lock->waitMask |= LOCKBIT_ON(lockmode); |
| 1189 | |
| 1190 | /* Set up wait information in PGPROC object, too */ |
| 1191 | MyProc->waitLock = lock; |
| 1192 | MyProc->waitProcLock = proclock; |
| 1193 | MyProc->waitLockMode = lockmode; |
| 1194 | |
| 1195 | MyProc->waitStatus = STATUS_WAITING; |
| 1196 | |
| 1197 | /* |
| 1198 | * If we detected deadlock, give up without waiting. This must agree with |
| 1199 | * CheckDeadLock's recovery code. |
| 1200 | */ |
| 1201 | if (early_deadlock) |
| 1202 | { |
| 1203 | RemoveFromWaitQueue(MyProc, hashcode); |
| 1204 | return STATUS_ERROR; |
| 1205 | } |
| 1206 | |
| 1207 | /* mark that we are waiting for a lock */ |
| 1208 | lockAwaited = locallock; |
| 1209 | |
| 1210 | /* |
| 1211 | * Release the lock table's partition lock. |
| 1212 | * |
| 1213 | * NOTE: this may also cause us to exit critical-section state, possibly |
| 1214 | * allowing a cancel/die interrupt to be accepted. This is OK because we |
| 1215 | * have recorded the fact that we are waiting for a lock, and so |
| 1216 | * LockErrorCleanup will clean up if cancel/die happens. |
| 1217 | */ |
| 1218 | LWLockRelease(partitionLock); |
| 1219 | |
| 1220 | /* |
| 1221 | * Also, now that we will successfully clean up after an ereport, it's |
| 1222 | * safe to check to see if there's a buffer pin deadlock against the |
| 1223 | * Startup process. Of course, that's only necessary if we're doing Hot |
| 1224 | * Standby and are not the Startup process ourselves. |
| 1225 | */ |
| 1226 | if (RecoveryInProgress() && !InRecovery) |
| 1227 | CheckRecoveryConflictDeadlock(); |
| 1228 | |
| 1229 | /* Reset deadlock_state before enabling the timeout handler */ |
| 1230 | deadlock_state = DS_NOT_YET_CHECKED; |
| 1231 | got_deadlock_timeout = false; |
| 1232 | |
| 1233 | /* |
| 1234 | * Set timer so we can wake up after awhile and check for a deadlock. If a |
| 1235 | * deadlock is detected, the handler sets MyProc->waitStatus = |
| 1236 | * STATUS_ERROR, allowing us to know that we must report failure rather |
| 1237 | * than success. |
| 1238 | * |
| 1239 | * By delaying the check until we've waited for a bit, we can avoid |
| 1240 | * running the rather expensive deadlock-check code in most cases. |
| 1241 | * |
| 1242 | * If LockTimeout is set, also enable the timeout for that. We can save a |
| 1243 | * few cycles by enabling both timeout sources in one call. |
| 1244 | * |
| 1245 | * If InHotStandby we set lock waits slightly later for clarity with other |
| 1246 | * code. |
| 1247 | */ |
| 1248 | if (!InHotStandby) |
| 1249 | { |
| 1250 | if (LockTimeout > 0) |
| 1251 | { |
| 1252 | EnableTimeoutParams timeouts[2]; |
| 1253 | |
| 1254 | timeouts[0].id = DEADLOCK_TIMEOUT; |
| 1255 | timeouts[0].type = TMPARAM_AFTER; |
| 1256 | timeouts[0].delay_ms = DeadlockTimeout; |
| 1257 | timeouts[1].id = LOCK_TIMEOUT; |
| 1258 | timeouts[1].type = TMPARAM_AFTER; |
| 1259 | timeouts[1].delay_ms = LockTimeout; |
| 1260 | enable_timeouts(timeouts, 2); |
| 1261 | } |
| 1262 | else |
| 1263 | enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); |
| 1264 | } |
| 1265 | |
| 1266 | /* |
| 1267 | * If somebody wakes us between LWLockRelease and WaitLatch, the latch |
| 1268 | * will not wait. But a set latch does not necessarily mean that the lock |
| 1269 | * is free now, as there are many other sources for latch sets than |
| 1270 | * somebody releasing the lock. |
| 1271 | * |
| 1272 | * We process interrupts whenever the latch has been set, so cancel/die |
| 1273 | * interrupts are processed quickly. This means we must not mind losing |
| 1274 | * control to a cancel/die interrupt here. We don't, because we have no |
| 1275 | * shared-state-change work to do after being granted the lock (the |
| 1276 | * grantor did it all). We do have to worry about canceling the deadlock |
| 1277 | * timeout and updating the locallock table, but if we lose control to an |
| 1278 | * error, LockErrorCleanup will fix that up. |
| 1279 | */ |
| 1280 | do |
| 1281 | { |
| 1282 | if (InHotStandby) |
| 1283 | { |
| 1284 | /* Set a timer and wait for that or for the Lock to be granted */ |
| 1285 | ResolveRecoveryConflictWithLock(locallock->tag.lock); |
| 1286 | } |
| 1287 | else |
| 1288 | { |
| 1289 | (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0, |
| 1290 | PG_WAIT_LOCK | locallock->tag.lock.locktag_type); |
| 1291 | ResetLatch(MyLatch); |
| 1292 | /* check for deadlocks first, as that's probably log-worthy */ |
| 1293 | if (got_deadlock_timeout) |
| 1294 | { |
| 1295 | CheckDeadLock(); |
| 1296 | got_deadlock_timeout = false; |
| 1297 | } |
| 1298 | CHECK_FOR_INTERRUPTS(); |
| 1299 | } |
| 1300 | |
| 1301 | /* |
| 1302 | * waitStatus could change from STATUS_WAITING to something else |
| 1303 | * asynchronously. Read it just once per loop to prevent surprising |
| 1304 | * behavior (such as missing log messages). |
| 1305 | */ |
| 1306 | myWaitStatus = *((volatile int *) &MyProc->waitStatus); |
| 1307 | |
| 1308 | /* |
| 1309 | * If we are not deadlocked, but are waiting on an autovacuum-induced |
| 1310 | * task, send a signal to interrupt it. |
| 1311 | */ |
| 1312 | if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel) |
| 1313 | { |
| 1314 | PGPROC *autovac = GetBlockingAutoVacuumPgproc(); |
| 1315 | PGXACT *autovac_pgxact = &ProcGlobal->allPgXact[autovac->pgprocno]; |
| 1316 | |
| 1317 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); |
| 1318 | |
| 1319 | /* |
| 1320 | * Only do it if the worker is not working to protect against Xid |
| 1321 | * wraparound. |
| 1322 | */ |
| 1323 | if ((autovac_pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && |
| 1324 | !(autovac_pgxact->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND)) |
| 1325 | { |
| 1326 | int pid = autovac->pid; |
| 1327 | StringInfoData locktagbuf; |
| 1328 | StringInfoData logbuf; /* errdetail for server log */ |
| 1329 | |
| 1330 | initStringInfo(&locktagbuf); |
| 1331 | initStringInfo(&logbuf); |
| 1332 | DescribeLockTag(&locktagbuf, &lock->tag); |
| 1333 | appendStringInfo(&logbuf, |
| 1334 | _("Process %d waits for %s on %s." ), |
| 1335 | MyProcPid, |
| 1336 | GetLockmodeName(lock->tag.locktag_lockmethodid, |
| 1337 | lockmode), |
| 1338 | locktagbuf.data); |
| 1339 | |
| 1340 | /* release lock as quickly as possible */ |
| 1341 | LWLockRelease(ProcArrayLock); |
| 1342 | |
| 1343 | /* send the autovacuum worker Back to Old Kent Road */ |
| 1344 | ereport(DEBUG1, |
| 1345 | (errmsg("sending cancel to blocking autovacuum PID %d" , |
| 1346 | pid), |
| 1347 | errdetail_log("%s" , logbuf.data))); |
| 1348 | |
| 1349 | if (kill(pid, SIGINT) < 0) |
| 1350 | { |
| 1351 | /* |
| 1352 | * There's a race condition here: once we release the |
| 1353 | * ProcArrayLock, it's possible for the autovac worker to |
| 1354 | * close up shop and exit before we can do the kill(). |
| 1355 | * Therefore, we do not whinge about no-such-process. |
| 1356 | * Other errors such as EPERM could conceivably happen if |
| 1357 | * the kernel recycles the PID fast enough, but such cases |
| 1358 | * seem improbable enough that it's probably best to issue |
| 1359 | * a warning if we see some other errno. |
| 1360 | */ |
| 1361 | if (errno != ESRCH) |
| 1362 | ereport(WARNING, |
| 1363 | (errmsg("could not send signal to process %d: %m" , |
| 1364 | pid))); |
| 1365 | } |
| 1366 | |
| 1367 | pfree(logbuf.data); |
| 1368 | pfree(locktagbuf.data); |
| 1369 | } |
| 1370 | else |
| 1371 | LWLockRelease(ProcArrayLock); |
| 1372 | |
| 1373 | /* prevent signal from being resent more than once */ |
| 1374 | allow_autovacuum_cancel = false; |
| 1375 | } |
| 1376 | |
| 1377 | /* |
| 1378 | * If awoken after the deadlock check interrupt has run, and |
| 1379 | * log_lock_waits is on, then report about the wait. |
| 1380 | */ |
| 1381 | if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED) |
| 1382 | { |
| 1383 | StringInfoData buf, |
| 1384 | lock_waiters_sbuf, |
| 1385 | lock_holders_sbuf; |
| 1386 | const char *modename; |
| 1387 | long secs; |
| 1388 | int usecs; |
| 1389 | long msecs; |
| 1390 | SHM_QUEUE *procLocks; |
| 1391 | PROCLOCK *proclock; |
| 1392 | bool first_holder = true, |
| 1393 | first_waiter = true; |
| 1394 | int lockHoldersNum = 0; |
| 1395 | |
| 1396 | initStringInfo(&buf); |
| 1397 | initStringInfo(&lock_waiters_sbuf); |
| 1398 | initStringInfo(&lock_holders_sbuf); |
| 1399 | |
| 1400 | DescribeLockTag(&buf, &locallock->tag.lock); |
| 1401 | modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid, |
| 1402 | lockmode); |
| 1403 | TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT), |
| 1404 | GetCurrentTimestamp(), |
| 1405 | &secs, &usecs); |
| 1406 | msecs = secs * 1000 + usecs / 1000; |
| 1407 | usecs = usecs % 1000; |
| 1408 | |
| 1409 | /* |
| 1410 | * we loop over the lock's procLocks to gather a list of all |
| 1411 | * holders and waiters. Thus we will be able to provide more |
| 1412 | * detailed information for lock debugging purposes. |
| 1413 | * |
| 1414 | * lock->procLocks contains all processes which hold or wait for |
| 1415 | * this lock. |
| 1416 | */ |
| 1417 | |
| 1418 | LWLockAcquire(partitionLock, LW_SHARED); |
| 1419 | |
| 1420 | procLocks = &(lock->procLocks); |
| 1421 | proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, |
| 1422 | offsetof(PROCLOCK, lockLink)); |
| 1423 | |
| 1424 | while (proclock) |
| 1425 | { |
| 1426 | /* |
| 1427 | * we are a waiter if myProc->waitProcLock == proclock; we are |
| 1428 | * a holder if it is NULL or something different |
| 1429 | */ |
| 1430 | if (proclock->tag.myProc->waitProcLock == proclock) |
| 1431 | { |
| 1432 | if (first_waiter) |
| 1433 | { |
| 1434 | appendStringInfo(&lock_waiters_sbuf, "%d" , |
| 1435 | proclock->tag.myProc->pid); |
| 1436 | first_waiter = false; |
| 1437 | } |
| 1438 | else |
| 1439 | appendStringInfo(&lock_waiters_sbuf, ", %d" , |
| 1440 | proclock->tag.myProc->pid); |
| 1441 | } |
| 1442 | else |
| 1443 | { |
| 1444 | if (first_holder) |
| 1445 | { |
| 1446 | appendStringInfo(&lock_holders_sbuf, "%d" , |
| 1447 | proclock->tag.myProc->pid); |
| 1448 | first_holder = false; |
| 1449 | } |
| 1450 | else |
| 1451 | appendStringInfo(&lock_holders_sbuf, ", %d" , |
| 1452 | proclock->tag.myProc->pid); |
| 1453 | |
| 1454 | lockHoldersNum++; |
| 1455 | } |
| 1456 | |
| 1457 | proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, |
| 1458 | offsetof(PROCLOCK, lockLink)); |
| 1459 | } |
| 1460 | |
| 1461 | LWLockRelease(partitionLock); |
| 1462 | |
| 1463 | if (deadlock_state == DS_SOFT_DEADLOCK) |
| 1464 | ereport(LOG, |
| 1465 | (errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms" , |
| 1466 | MyProcPid, modename, buf.data, msecs, usecs), |
| 1467 | (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s." , |
| 1468 | "Processes holding the lock: %s. Wait queue: %s." , |
| 1469 | lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); |
| 1470 | else if (deadlock_state == DS_HARD_DEADLOCK) |
| 1471 | { |
| 1472 | /* |
| 1473 | * This message is a bit redundant with the error that will be |
| 1474 | * reported subsequently, but in some cases the error report |
| 1475 | * might not make it to the log (eg, if it's caught by an |
| 1476 | * exception handler), and we want to ensure all long-wait |
| 1477 | * events get logged. |
| 1478 | */ |
| 1479 | ereport(LOG, |
| 1480 | (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms" , |
| 1481 | MyProcPid, modename, buf.data, msecs, usecs), |
| 1482 | (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s." , |
| 1483 | "Processes holding the lock: %s. Wait queue: %s." , |
| 1484 | lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); |
| 1485 | } |
| 1486 | |
| 1487 | if (myWaitStatus == STATUS_WAITING) |
| 1488 | ereport(LOG, |
| 1489 | (errmsg("process %d still waiting for %s on %s after %ld.%03d ms" , |
| 1490 | MyProcPid, modename, buf.data, msecs, usecs), |
| 1491 | (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s." , |
| 1492 | "Processes holding the lock: %s. Wait queue: %s." , |
| 1493 | lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); |
| 1494 | else if (myWaitStatus == STATUS_OK) |
| 1495 | ereport(LOG, |
| 1496 | (errmsg("process %d acquired %s on %s after %ld.%03d ms" , |
| 1497 | MyProcPid, modename, buf.data, msecs, usecs))); |
| 1498 | else |
| 1499 | { |
| 1500 | Assert(myWaitStatus == STATUS_ERROR); |
| 1501 | |
| 1502 | /* |
| 1503 | * Currently, the deadlock checker always kicks its own |
| 1504 | * process, which means that we'll only see STATUS_ERROR when |
| 1505 | * deadlock_state == DS_HARD_DEADLOCK, and there's no need to |
| 1506 | * print redundant messages. But for completeness and |
| 1507 | * future-proofing, print a message if it looks like someone |
| 1508 | * else kicked us off the lock. |
| 1509 | */ |
| 1510 | if (deadlock_state != DS_HARD_DEADLOCK) |
| 1511 | ereport(LOG, |
| 1512 | (errmsg("process %d failed to acquire %s on %s after %ld.%03d ms" , |
| 1513 | MyProcPid, modename, buf.data, msecs, usecs), |
| 1514 | (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s." , |
| 1515 | "Processes holding the lock: %s. Wait queue: %s." , |
| 1516 | lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); |
| 1517 | } |
| 1518 | |
| 1519 | /* |
| 1520 | * At this point we might still need to wait for the lock. Reset |
| 1521 | * state so we don't print the above messages again. |
| 1522 | */ |
| 1523 | deadlock_state = DS_NO_DEADLOCK; |
| 1524 | |
| 1525 | pfree(buf.data); |
| 1526 | pfree(lock_holders_sbuf.data); |
| 1527 | pfree(lock_waiters_sbuf.data); |
| 1528 | } |
| 1529 | } while (myWaitStatus == STATUS_WAITING); |
| 1530 | |
| 1531 | /* |
| 1532 | * Disable the timers, if they are still running. As in LockErrorCleanup, |
| 1533 | * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has |
| 1534 | * already caused QueryCancelPending to become set, we want the cancel to |
| 1535 | * be reported as a lock timeout, not a user cancel. |
| 1536 | */ |
| 1537 | if (!InHotStandby) |
| 1538 | { |
| 1539 | if (LockTimeout > 0) |
| 1540 | { |
| 1541 | DisableTimeoutParams timeouts[2]; |
| 1542 | |
| 1543 | timeouts[0].id = DEADLOCK_TIMEOUT; |
| 1544 | timeouts[0].keep_indicator = false; |
| 1545 | timeouts[1].id = LOCK_TIMEOUT; |
| 1546 | timeouts[1].keep_indicator = true; |
| 1547 | disable_timeouts(timeouts, 2); |
| 1548 | } |
| 1549 | else |
| 1550 | disable_timeout(DEADLOCK_TIMEOUT, false); |
| 1551 | } |
| 1552 | |
| 1553 | /* |
| 1554 | * Re-acquire the lock table's partition lock. We have to do this to hold |
| 1555 | * off cancel/die interrupts before we can mess with lockAwaited (else we |
| 1556 | * might have a missed or duplicated locallock update). |
| 1557 | */ |
| 1558 | LWLockAcquire(partitionLock, LW_EXCLUSIVE); |
| 1559 | |
| 1560 | /* |
| 1561 | * We no longer want LockErrorCleanup to do anything. |
| 1562 | */ |
| 1563 | lockAwaited = NULL; |
| 1564 | |
| 1565 | /* |
| 1566 | * If we got the lock, be sure to remember it in the locallock table. |
| 1567 | */ |
| 1568 | if (MyProc->waitStatus == STATUS_OK) |
| 1569 | GrantAwaitedLock(); |
| 1570 | |
| 1571 | /* |
| 1572 | * We don't have to do anything else, because the awaker did all the |
| 1573 | * necessary update of the lock table and MyProc. |
| 1574 | */ |
| 1575 | return MyProc->waitStatus; |
| 1576 | } |
| 1577 | |
| 1578 | |
| 1579 | /* |
| 1580 | * ProcWakeup -- wake up a process by setting its latch. |
| 1581 | * |
| 1582 | * Also remove the process from the wait queue and set its links invalid. |
| 1583 | * RETURN: the next process in the wait queue. |
| 1584 | * |
| 1585 | * The appropriate lock partition lock must be held by caller. |
| 1586 | * |
| 1587 | * XXX: presently, this code is only used for the "success" case, and only |
| 1588 | * works correctly for that case. To clean up in failure case, would need |
| 1589 | * to twiddle the lock's request counts too --- see RemoveFromWaitQueue. |
| 1590 | * Hence, in practice the waitStatus parameter must be STATUS_OK. |
| 1591 | */ |
| 1592 | PGPROC * |
| 1593 | ProcWakeup(PGPROC *proc, int waitStatus) |
| 1594 | { |
| 1595 | PGPROC *retProc; |
| 1596 | |
| 1597 | /* Proc should be sleeping ... */ |
| 1598 | if (proc->links.prev == NULL || |
| 1599 | proc->links.next == NULL) |
| 1600 | return NULL; |
| 1601 | Assert(proc->waitStatus == STATUS_WAITING); |
| 1602 | |
| 1603 | /* Save next process before we zap the list link */ |
| 1604 | retProc = (PGPROC *) proc->links.next; |
| 1605 | |
| 1606 | /* Remove process from wait queue */ |
| 1607 | SHMQueueDelete(&(proc->links)); |
| 1608 | (proc->waitLock->waitProcs.size)--; |
| 1609 | |
| 1610 | /* Clean up process' state and pass it the ok/fail signal */ |
| 1611 | proc->waitLock = NULL; |
| 1612 | proc->waitProcLock = NULL; |
| 1613 | proc->waitStatus = waitStatus; |
| 1614 | |
| 1615 | /* And awaken it */ |
| 1616 | SetLatch(&proc->procLatch); |
| 1617 | |
| 1618 | return retProc; |
| 1619 | } |
| 1620 | |
| 1621 | /* |
| 1622 | * ProcLockWakeup -- routine for waking up processes when a lock is |
| 1623 | * released (or a prior waiter is aborted). Scan all waiters |
| 1624 | * for lock, waken any that are no longer blocked. |
| 1625 | * |
| 1626 | * The appropriate lock partition lock must be held by caller. |
| 1627 | */ |
| 1628 | void |
| 1629 | ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock) |
| 1630 | { |
| 1631 | PROC_QUEUE *waitQueue = &(lock->waitProcs); |
| 1632 | int queue_size = waitQueue->size; |
| 1633 | PGPROC *proc; |
| 1634 | LOCKMASK aheadRequests = 0; |
| 1635 | |
| 1636 | Assert(queue_size >= 0); |
| 1637 | |
| 1638 | if (queue_size == 0) |
| 1639 | return; |
| 1640 | |
| 1641 | proc = (PGPROC *) waitQueue->links.next; |
| 1642 | |
| 1643 | while (queue_size-- > 0) |
| 1644 | { |
| 1645 | LOCKMODE lockmode = proc->waitLockMode; |
| 1646 | |
| 1647 | /* |
| 1648 | * Waken if (a) doesn't conflict with requests of earlier waiters, and |
| 1649 | * (b) doesn't conflict with already-held locks. |
| 1650 | */ |
| 1651 | if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && |
| 1652 | LockCheckConflicts(lockMethodTable, |
| 1653 | lockmode, |
| 1654 | lock, |
| 1655 | proc->waitProcLock) == STATUS_OK) |
| 1656 | { |
| 1657 | /* OK to waken */ |
| 1658 | GrantLock(lock, proc->waitProcLock, lockmode); |
| 1659 | proc = ProcWakeup(proc, STATUS_OK); |
| 1660 | |
| 1661 | /* |
| 1662 | * ProcWakeup removes proc from the lock's waiting process queue |
| 1663 | * and returns the next proc in chain; don't use proc's next-link, |
| 1664 | * because it's been cleared. |
| 1665 | */ |
| 1666 | } |
| 1667 | else |
| 1668 | { |
| 1669 | /* |
| 1670 | * Cannot wake this guy. Remember his request for later checks. |
| 1671 | */ |
| 1672 | aheadRequests |= LOCKBIT_ON(lockmode); |
| 1673 | proc = (PGPROC *) proc->links.next; |
| 1674 | } |
| 1675 | } |
| 1676 | |
| 1677 | Assert(waitQueue->size >= 0); |
| 1678 | } |
| 1679 | |
| 1680 | /* |
| 1681 | * CheckDeadLock |
| 1682 | * |
| 1683 | * We only get to this routine, if DEADLOCK_TIMEOUT fired while waiting for a |
| 1684 | * lock to be released by some other process. Check if there's a deadlock; if |
| 1685 | * not, just return. (But signal ProcSleep to log a message, if |
| 1686 | * log_lock_waits is true.) If we have a real deadlock, remove ourselves from |
| 1687 | * the lock's wait queue and signal an error to ProcSleep. |
| 1688 | */ |
| 1689 | static void |
| 1690 | CheckDeadLock(void) |
| 1691 | { |
| 1692 | int i; |
| 1693 | |
| 1694 | /* |
| 1695 | * Acquire exclusive lock on the entire shared lock data structures. Must |
| 1696 | * grab LWLocks in partition-number order to avoid LWLock deadlock. |
| 1697 | * |
| 1698 | * Note that the deadlock check interrupt had better not be enabled |
| 1699 | * anywhere that this process itself holds lock partition locks, else this |
| 1700 | * will wait forever. Also note that LWLockAcquire creates a critical |
| 1701 | * section, so that this routine cannot be interrupted by cancel/die |
| 1702 | * interrupts. |
| 1703 | */ |
| 1704 | for (i = 0; i < NUM_LOCK_PARTITIONS; i++) |
| 1705 | LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE); |
| 1706 | |
| 1707 | /* |
| 1708 | * Check to see if we've been awoken by anyone in the interim. |
| 1709 | * |
| 1710 | * If we have, we can return and resume our transaction -- happy day. |
| 1711 | * Before we are awoken the process releasing the lock grants it to us so |
| 1712 | * we know that we don't have to wait anymore. |
| 1713 | * |
| 1714 | * We check by looking to see if we've been unlinked from the wait queue. |
| 1715 | * This is safe because we hold the lock partition lock. |
| 1716 | */ |
| 1717 | if (MyProc->links.prev == NULL || |
| 1718 | MyProc->links.next == NULL) |
| 1719 | goto check_done; |
| 1720 | |
| 1721 | #ifdef LOCK_DEBUG |
| 1722 | if (Debug_deadlocks) |
| 1723 | DumpAllLocks(); |
| 1724 | #endif |
| 1725 | |
| 1726 | /* Run the deadlock check, and set deadlock_state for use by ProcSleep */ |
| 1727 | deadlock_state = DeadLockCheck(MyProc); |
| 1728 | |
| 1729 | if (deadlock_state == DS_HARD_DEADLOCK) |
| 1730 | { |
| 1731 | /* |
| 1732 | * Oops. We have a deadlock. |
| 1733 | * |
| 1734 | * Get this process out of wait state. (Note: we could do this more |
| 1735 | * efficiently by relying on lockAwaited, but use this coding to |
| 1736 | * preserve the flexibility to kill some other transaction than the |
| 1737 | * one detecting the deadlock.) |
| 1738 | * |
| 1739 | * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so |
| 1740 | * ProcSleep will report an error after we return from the signal |
| 1741 | * handler. |
| 1742 | */ |
| 1743 | Assert(MyProc->waitLock != NULL); |
| 1744 | RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag))); |
| 1745 | |
| 1746 | /* |
| 1747 | * We're done here. Transaction abort caused by the error that |
| 1748 | * ProcSleep will raise will cause any other locks we hold to be |
| 1749 | * released, thus allowing other processes to wake up; we don't need |
| 1750 | * to do that here. NOTE: an exception is that releasing locks we |
| 1751 | * hold doesn't consider the possibility of waiters that were blocked |
| 1752 | * behind us on the lock we just failed to get, and might now be |
| 1753 | * wakable because we're not in front of them anymore. However, |
| 1754 | * RemoveFromWaitQueue took care of waking up any such processes. |
| 1755 | */ |
| 1756 | } |
| 1757 | |
| 1758 | /* |
| 1759 | * And release locks. We do this in reverse order for two reasons: (1) |
| 1760 | * Anyone else who needs more than one of the locks will be trying to lock |
| 1761 | * them in increasing order; we don't want to release the other process |
| 1762 | * until it can get all the locks it needs. (2) This avoids O(N^2) |
| 1763 | * behavior inside LWLockRelease. |
| 1764 | */ |
| 1765 | check_done: |
| 1766 | for (i = NUM_LOCK_PARTITIONS; --i >= 0;) |
| 1767 | LWLockRelease(LockHashPartitionLockByIndex(i)); |
| 1768 | } |
| 1769 | |
| 1770 | /* |
| 1771 | * CheckDeadLockAlert - Handle the expiry of deadlock_timeout. |
| 1772 | * |
| 1773 | * NB: Runs inside a signal handler, be careful. |
| 1774 | */ |
| 1775 | void |
| 1776 | CheckDeadLockAlert(void) |
| 1777 | { |
| 1778 | int save_errno = errno; |
| 1779 | |
| 1780 | got_deadlock_timeout = true; |
| 1781 | |
| 1782 | /* |
| 1783 | * Have to set the latch again, even if handle_sig_alarm already did. Back |
| 1784 | * then got_deadlock_timeout wasn't yet set... It's unlikely that this |
| 1785 | * ever would be a problem, but setting a set latch again is cheap. |
| 1786 | */ |
| 1787 | SetLatch(MyLatch); |
| 1788 | errno = save_errno; |
| 1789 | } |
| 1790 | |
| 1791 | /* |
| 1792 | * ProcWaitForSignal - wait for a signal from another backend. |
| 1793 | * |
| 1794 | * As this uses the generic process latch the caller has to be robust against |
| 1795 | * unrelated wakeups: Always check that the desired state has occurred, and |
| 1796 | * wait again if not. |
| 1797 | */ |
| 1798 | void |
| 1799 | ProcWaitForSignal(uint32 wait_event_info) |
| 1800 | { |
| 1801 | (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0, |
| 1802 | wait_event_info); |
| 1803 | ResetLatch(MyLatch); |
| 1804 | CHECK_FOR_INTERRUPTS(); |
| 1805 | } |
| 1806 | |
| 1807 | /* |
| 1808 | * ProcSendSignal - send a signal to a backend identified by PID |
| 1809 | */ |
| 1810 | void |
| 1811 | ProcSendSignal(int pid) |
| 1812 | { |
| 1813 | PGPROC *proc = NULL; |
| 1814 | |
| 1815 | if (RecoveryInProgress()) |
| 1816 | { |
| 1817 | SpinLockAcquire(ProcStructLock); |
| 1818 | |
| 1819 | /* |
| 1820 | * Check to see whether it is the Startup process we wish to signal. |
| 1821 | * This call is made by the buffer manager when it wishes to wake up a |
| 1822 | * process that has been waiting for a pin in so it can obtain a |
| 1823 | * cleanup lock using LockBufferForCleanup(). Startup is not a normal |
| 1824 | * backend, so BackendPidGetProc() will not return any pid at all. So |
| 1825 | * we remember the information for this special case. |
| 1826 | */ |
| 1827 | if (pid == ProcGlobal->startupProcPid) |
| 1828 | proc = ProcGlobal->startupProc; |
| 1829 | |
| 1830 | SpinLockRelease(ProcStructLock); |
| 1831 | } |
| 1832 | |
| 1833 | if (proc == NULL) |
| 1834 | proc = BackendPidGetProc(pid); |
| 1835 | |
| 1836 | if (proc != NULL) |
| 1837 | { |
| 1838 | SetLatch(&proc->procLatch); |
| 1839 | } |
| 1840 | } |
| 1841 | |
| 1842 | /* |
| 1843 | * BecomeLockGroupLeader - designate process as lock group leader |
| 1844 | * |
| 1845 | * Once this function has returned, other processes can join the lock group |
| 1846 | * by calling BecomeLockGroupMember. |
| 1847 | */ |
| 1848 | void |
| 1849 | BecomeLockGroupLeader(void) |
| 1850 | { |
| 1851 | LWLock *leader_lwlock; |
| 1852 | |
| 1853 | /* If we already did it, we don't need to do it again. */ |
| 1854 | if (MyProc->lockGroupLeader == MyProc) |
| 1855 | return; |
| 1856 | |
| 1857 | /* We had better not be a follower. */ |
| 1858 | Assert(MyProc->lockGroupLeader == NULL); |
| 1859 | |
| 1860 | /* Create single-member group, containing only ourselves. */ |
| 1861 | leader_lwlock = LockHashPartitionLockByProc(MyProc); |
| 1862 | LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); |
| 1863 | MyProc->lockGroupLeader = MyProc; |
| 1864 | dlist_push_head(&MyProc->lockGroupMembers, &MyProc->lockGroupLink); |
| 1865 | LWLockRelease(leader_lwlock); |
| 1866 | } |
| 1867 | |
| 1868 | /* |
| 1869 | * BecomeLockGroupMember - designate process as lock group member |
| 1870 | * |
| 1871 | * This is pretty straightforward except for the possibility that the leader |
| 1872 | * whose group we're trying to join might exit before we manage to do so; |
| 1873 | * and the PGPROC might get recycled for an unrelated process. To avoid |
| 1874 | * that, we require the caller to pass the PID of the intended PGPROC as |
| 1875 | * an interlock. Returns true if we successfully join the intended lock |
| 1876 | * group, and false if not. |
| 1877 | */ |
| 1878 | bool |
| 1879 | BecomeLockGroupMember(PGPROC *leader, int pid) |
| 1880 | { |
| 1881 | LWLock *leader_lwlock; |
| 1882 | bool ok = false; |
| 1883 | |
| 1884 | /* Group leader can't become member of group */ |
| 1885 | Assert(MyProc != leader); |
| 1886 | |
| 1887 | /* Can't already be a member of a group */ |
| 1888 | Assert(MyProc->lockGroupLeader == NULL); |
| 1889 | |
| 1890 | /* PID must be valid. */ |
| 1891 | Assert(pid != 0); |
| 1892 | |
| 1893 | /* |
| 1894 | * Get lock protecting the group fields. Note LockHashPartitionLockByProc |
| 1895 | * accesses leader->pgprocno in a PGPROC that might be free. This is safe |
| 1896 | * because all PGPROCs' pgprocno fields are set during shared memory |
| 1897 | * initialization and never change thereafter; so we will acquire the |
| 1898 | * correct lock even if the leader PGPROC is in process of being recycled. |
| 1899 | */ |
| 1900 | leader_lwlock = LockHashPartitionLockByProc(leader); |
| 1901 | LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); |
| 1902 | |
| 1903 | /* Is this the leader we're looking for? */ |
| 1904 | if (leader->pid == pid && leader->lockGroupLeader == leader) |
| 1905 | { |
| 1906 | /* OK, join the group */ |
| 1907 | ok = true; |
| 1908 | MyProc->lockGroupLeader = leader; |
| 1909 | dlist_push_tail(&leader->lockGroupMembers, &MyProc->lockGroupLink); |
| 1910 | } |
| 1911 | LWLockRelease(leader_lwlock); |
| 1912 | |
| 1913 | return ok; |
| 1914 | } |
| 1915 | |