| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * standby.c |
| 4 | * Misc functions used in Hot Standby mode. |
| 5 | * |
| 6 | * All functions for handling RM_STANDBY_ID, which relate to |
| 7 | * AccessExclusiveLocks and starting snapshots for Hot Standby mode. |
| 8 | * Plus conflict recovery processing. |
| 9 | * |
| 10 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 11 | * Portions Copyright (c) 1994, Regents of the University of California |
| 12 | * |
| 13 | * IDENTIFICATION |
| 14 | * src/backend/storage/ipc/standby.c |
| 15 | * |
| 16 | *------------------------------------------------------------------------- |
| 17 | */ |
| 18 | #include "postgres.h" |
| 19 | #include "access/transam.h" |
| 20 | #include "access/twophase.h" |
| 21 | #include "access/xact.h" |
| 22 | #include "access/xlog.h" |
| 23 | #include "access/xloginsert.h" |
| 24 | #include "miscadmin.h" |
| 25 | #include "pgstat.h" |
| 26 | #include "storage/bufmgr.h" |
| 27 | #include "storage/lmgr.h" |
| 28 | #include "storage/proc.h" |
| 29 | #include "storage/procarray.h" |
| 30 | #include "storage/sinvaladt.h" |
| 31 | #include "storage/standby.h" |
| 32 | #include "utils/hsearch.h" |
| 33 | #include "utils/memutils.h" |
| 34 | #include "utils/ps_status.h" |
| 35 | #include "utils/timeout.h" |
| 36 | #include "utils/timestamp.h" |
| 37 | |
| 38 | /* User-settable GUC parameters */ |
| 39 | int vacuum_defer_cleanup_age; |
| 40 | int max_standby_archive_delay = 30 * 1000; |
| 41 | int max_standby_streaming_delay = 30 * 1000; |
| 42 | |
| 43 | static HTAB *RecoveryLockLists; |
| 44 | |
| 45 | static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, |
| 46 | ProcSignalReason reason); |
| 47 | static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason); |
| 48 | static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts); |
| 49 | static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks); |
| 50 | |
| 51 | /* |
| 52 | * Keep track of all the locks owned by a given transaction. |
| 53 | */ |
| 54 | typedef struct RecoveryLockListsEntry |
| 55 | { |
| 56 | TransactionId xid; |
| 57 | List *locks; |
| 58 | } RecoveryLockListsEntry; |
| 59 | |
| 60 | /* |
| 61 | * InitRecoveryTransactionEnvironment |
| 62 | * Initialize tracking of in-progress transactions in master |
| 63 | * |
| 64 | * We need to issue shared invalidations and hold locks. Holding locks |
| 65 | * means others may want to wait on us, so we need to make a lock table |
| 66 | * vxact entry like a real transaction. We could create and delete |
| 67 | * lock table entries for each transaction but its simpler just to create |
| 68 | * one permanent entry and leave it there all the time. Locks are then |
| 69 | * acquired and released as needed. Yes, this means you can see the |
| 70 | * Startup process in pg_locks once we have run this. |
| 71 | */ |
| 72 | void |
| 73 | InitRecoveryTransactionEnvironment(void) |
| 74 | { |
| 75 | VirtualTransactionId vxid; |
| 76 | HASHCTL hash_ctl; |
| 77 | |
| 78 | /* |
| 79 | * Initialize the hash table for tracking the list of locks held by each |
| 80 | * transaction. |
| 81 | */ |
| 82 | memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| 83 | hash_ctl.keysize = sizeof(TransactionId); |
| 84 | hash_ctl.entrysize = sizeof(RecoveryLockListsEntry); |
| 85 | RecoveryLockLists = hash_create("RecoveryLockLists" , |
| 86 | 64, |
| 87 | &hash_ctl, |
| 88 | HASH_ELEM | HASH_BLOBS); |
| 89 | |
| 90 | /* |
| 91 | * Initialize shared invalidation management for Startup process, being |
| 92 | * careful to register ourselves as a sendOnly process so we don't need to |
| 93 | * read messages, nor will we get signalled when the queue starts filling |
| 94 | * up. |
| 95 | */ |
| 96 | SharedInvalBackendInit(true); |
| 97 | |
| 98 | /* |
| 99 | * Lock a virtual transaction id for Startup process. |
| 100 | * |
| 101 | * We need to do GetNextLocalTransactionId() because |
| 102 | * SharedInvalBackendInit() leaves localTransactionid invalid and the lock |
| 103 | * manager doesn't like that at all. |
| 104 | * |
| 105 | * Note that we don't need to run XactLockTableInsert() because nobody |
| 106 | * needs to wait on xids. That sounds a little strange, but table locks |
| 107 | * are held by vxids and row level locks are held by xids. All queries |
| 108 | * hold AccessShareLocks so never block while we write or lock new rows. |
| 109 | */ |
| 110 | vxid.backendId = MyBackendId; |
| 111 | vxid.localTransactionId = GetNextLocalTransactionId(); |
| 112 | VirtualXactLockTableInsert(vxid); |
| 113 | |
| 114 | standbyState = STANDBY_INITIALIZED; |
| 115 | } |
| 116 | |
| 117 | /* |
| 118 | * ShutdownRecoveryTransactionEnvironment |
| 119 | * Shut down transaction tracking |
| 120 | * |
| 121 | * Prepare to switch from hot standby mode to normal operation. Shut down |
| 122 | * recovery-time transaction tracking. |
| 123 | */ |
| 124 | void |
| 125 | ShutdownRecoveryTransactionEnvironment(void) |
| 126 | { |
| 127 | /* Mark all tracked in-progress transactions as finished. */ |
| 128 | ExpireAllKnownAssignedTransactionIds(); |
| 129 | |
| 130 | /* Release all locks the tracked transactions were holding */ |
| 131 | StandbyReleaseAllLocks(); |
| 132 | |
| 133 | /* Destroy the hash table of locks. */ |
| 134 | hash_destroy(RecoveryLockLists); |
| 135 | RecoveryLockLists = NULL; |
| 136 | |
| 137 | /* Cleanup our VirtualTransaction */ |
| 138 | VirtualXactLockTableCleanup(); |
| 139 | } |
| 140 | |
| 141 | |
| 142 | /* |
| 143 | * ----------------------------------------------------- |
| 144 | * Standby wait timers and backend cancel logic |
| 145 | * ----------------------------------------------------- |
| 146 | */ |
| 147 | |
| 148 | /* |
| 149 | * Determine the cutoff time at which we want to start canceling conflicting |
| 150 | * transactions. Returns zero (a time safely in the past) if we are willing |
| 151 | * to wait forever. |
| 152 | */ |
| 153 | static TimestampTz |
| 154 | GetStandbyLimitTime(void) |
| 155 | { |
| 156 | TimestampTz rtime; |
| 157 | bool fromStream; |
| 158 | |
| 159 | /* |
| 160 | * The cutoff time is the last WAL data receipt time plus the appropriate |
| 161 | * delay variable. Delay of -1 means wait forever. |
| 162 | */ |
| 163 | GetXLogReceiptTime(&rtime, &fromStream); |
| 164 | if (fromStream) |
| 165 | { |
| 166 | if (max_standby_streaming_delay < 0) |
| 167 | return 0; /* wait forever */ |
| 168 | return TimestampTzPlusMilliseconds(rtime, max_standby_streaming_delay); |
| 169 | } |
| 170 | else |
| 171 | { |
| 172 | if (max_standby_archive_delay < 0) |
| 173 | return 0; /* wait forever */ |
| 174 | return TimestampTzPlusMilliseconds(rtime, max_standby_archive_delay); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | #define STANDBY_INITIAL_WAIT_US 1000 |
| 179 | static int standbyWait_us = STANDBY_INITIAL_WAIT_US; |
| 180 | |
| 181 | /* |
| 182 | * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs. |
| 183 | * We wait here for a while then return. If we decide we can't wait any |
| 184 | * more then we return true, if we can wait some more return false. |
| 185 | */ |
| 186 | static bool |
| 187 | WaitExceedsMaxStandbyDelay(void) |
| 188 | { |
| 189 | TimestampTz ltime; |
| 190 | |
| 191 | CHECK_FOR_INTERRUPTS(); |
| 192 | |
| 193 | /* Are we past the limit time? */ |
| 194 | ltime = GetStandbyLimitTime(); |
| 195 | if (ltime && GetCurrentTimestamp() >= ltime) |
| 196 | return true; |
| 197 | |
| 198 | /* |
| 199 | * Sleep a bit (this is essential to avoid busy-waiting). |
| 200 | */ |
| 201 | pg_usleep(standbyWait_us); |
| 202 | |
| 203 | /* |
| 204 | * Progressively increase the sleep times, but not to more than 1s, since |
| 205 | * pg_usleep isn't interruptible on some platforms. |
| 206 | */ |
| 207 | standbyWait_us *= 2; |
| 208 | if (standbyWait_us > 1000000) |
| 209 | standbyWait_us = 1000000; |
| 210 | |
| 211 | return false; |
| 212 | } |
| 213 | |
| 214 | /* |
| 215 | * This is the main executioner for any query backend that conflicts with |
| 216 | * recovery processing. Judgement has already been passed on it within |
| 217 | * a specific rmgr. Here we just issue the orders to the procs. The procs |
| 218 | * then throw the required error as instructed. |
| 219 | */ |
| 220 | static void |
| 221 | ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, |
| 222 | ProcSignalReason reason) |
| 223 | { |
| 224 | TimestampTz waitStart; |
| 225 | char *new_status; |
| 226 | |
| 227 | /* Fast exit, to avoid a kernel call if there's no work to be done. */ |
| 228 | if (!VirtualTransactionIdIsValid(*waitlist)) |
| 229 | return; |
| 230 | |
| 231 | waitStart = GetCurrentTimestamp(); |
| 232 | new_status = NULL; /* we haven't changed the ps display */ |
| 233 | |
| 234 | while (VirtualTransactionIdIsValid(*waitlist)) |
| 235 | { |
| 236 | /* reset standbyWait_us for each xact we wait for */ |
| 237 | standbyWait_us = STANDBY_INITIAL_WAIT_US; |
| 238 | |
| 239 | /* wait until the virtual xid is gone */ |
| 240 | while (!VirtualXactLock(*waitlist, false)) |
| 241 | { |
| 242 | /* |
| 243 | * Report via ps if we have been waiting for more than 500 msec |
| 244 | * (should that be configurable?) |
| 245 | */ |
| 246 | if (update_process_title && new_status == NULL && |
| 247 | TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(), |
| 248 | 500)) |
| 249 | { |
| 250 | const char *old_status; |
| 251 | int len; |
| 252 | |
| 253 | old_status = get_ps_display(&len); |
| 254 | new_status = (char *) palloc(len + 8 + 1); |
| 255 | memcpy(new_status, old_status, len); |
| 256 | strcpy(new_status + len, " waiting" ); |
| 257 | set_ps_display(new_status, false); |
| 258 | new_status[len] = '\0'; /* truncate off " waiting" */ |
| 259 | } |
| 260 | |
| 261 | /* Is it time to kill it? */ |
| 262 | if (WaitExceedsMaxStandbyDelay()) |
| 263 | { |
| 264 | pid_t pid; |
| 265 | |
| 266 | /* |
| 267 | * Now find out who to throw out of the balloon. |
| 268 | */ |
| 269 | Assert(VirtualTransactionIdIsValid(*waitlist)); |
| 270 | pid = CancelVirtualTransaction(*waitlist, reason); |
| 271 | |
| 272 | /* |
| 273 | * Wait a little bit for it to die so that we avoid flooding |
| 274 | * an unresponsive backend when system is heavily loaded. |
| 275 | */ |
| 276 | if (pid != 0) |
| 277 | pg_usleep(5000L); |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | /* The virtual transaction is gone now, wait for the next one */ |
| 282 | waitlist++; |
| 283 | } |
| 284 | |
| 285 | /* Reset ps display if we changed it */ |
| 286 | if (new_status) |
| 287 | { |
| 288 | set_ps_display(new_status, false); |
| 289 | pfree(new_status); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | void |
| 294 | ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node) |
| 295 | { |
| 296 | VirtualTransactionId *backends; |
| 297 | |
| 298 | /* |
| 299 | * If we get passed InvalidTransactionId then we are a little surprised, |
| 300 | * but it is theoretically possible in normal running. It also happens |
| 301 | * when replaying already applied WAL records after a standby crash or |
| 302 | * restart, or when replaying an XLOG_HEAP2_VISIBLE record that marks as |
| 303 | * frozen a page which was already all-visible. If latestRemovedXid is |
| 304 | * invalid then there is no conflict. That rule applies across all record |
| 305 | * types that suffer from this conflict. |
| 306 | */ |
| 307 | if (!TransactionIdIsValid(latestRemovedXid)) |
| 308 | return; |
| 309 | |
| 310 | backends = GetConflictingVirtualXIDs(latestRemovedXid, |
| 311 | node.dbNode); |
| 312 | |
| 313 | ResolveRecoveryConflictWithVirtualXIDs(backends, |
| 314 | PROCSIG_RECOVERY_CONFLICT_SNAPSHOT); |
| 315 | } |
| 316 | |
| 317 | void |
| 318 | ResolveRecoveryConflictWithTablespace(Oid tsid) |
| 319 | { |
| 320 | VirtualTransactionId *temp_file_users; |
| 321 | |
| 322 | /* |
| 323 | * Standby users may be currently using this tablespace for their |
| 324 | * temporary files. We only care about current users because |
| 325 | * temp_tablespace parameter will just ignore tablespaces that no longer |
| 326 | * exist. |
| 327 | * |
| 328 | * Ask everybody to cancel their queries immediately so we can ensure no |
| 329 | * temp files remain and we can remove the tablespace. Nuke the entire |
| 330 | * site from orbit, it's the only way to be sure. |
| 331 | * |
| 332 | * XXX: We could work out the pids of active backends using this |
| 333 | * tablespace by examining the temp filenames in the directory. We would |
| 334 | * then convert the pids into VirtualXIDs before attempting to cancel |
| 335 | * them. |
| 336 | * |
| 337 | * We don't wait for commit because drop tablespace is non-transactional. |
| 338 | */ |
| 339 | temp_file_users = GetConflictingVirtualXIDs(InvalidTransactionId, |
| 340 | InvalidOid); |
| 341 | ResolveRecoveryConflictWithVirtualXIDs(temp_file_users, |
| 342 | PROCSIG_RECOVERY_CONFLICT_TABLESPACE); |
| 343 | } |
| 344 | |
| 345 | void |
| 346 | ResolveRecoveryConflictWithDatabase(Oid dbid) |
| 347 | { |
| 348 | /* |
| 349 | * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that |
| 350 | * only waits for transactions and completely idle sessions would block |
| 351 | * us. This is rare enough that we do this as simply as possible: no wait, |
| 352 | * just force them off immediately. |
| 353 | * |
| 354 | * No locking is required here because we already acquired |
| 355 | * AccessExclusiveLock. Anybody trying to connect while we do this will |
| 356 | * block during InitPostgres() and then disconnect when they see the |
| 357 | * database has been removed. |
| 358 | */ |
| 359 | while (CountDBBackends(dbid) > 0) |
| 360 | { |
| 361 | CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true); |
| 362 | |
| 363 | /* |
| 364 | * Wait awhile for them to die so that we avoid flooding an |
| 365 | * unresponsive backend when system is heavily loaded. |
| 366 | */ |
| 367 | pg_usleep(10000); |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | /* |
| 372 | * ResolveRecoveryConflictWithLock is called from ProcSleep() |
| 373 | * to resolve conflicts with other backends holding relation locks. |
| 374 | * |
| 375 | * The WaitLatch sleep normally done in ProcSleep() |
| 376 | * (when not InHotStandby) is performed here, for code clarity. |
| 377 | * |
| 378 | * We either resolve conflicts immediately or set a timeout to wake us at |
| 379 | * the limit of our patience. |
| 380 | * |
| 381 | * Resolve conflicts by canceling to all backends holding a conflicting |
| 382 | * lock. As we are already queued to be granted the lock, no new lock |
| 383 | * requests conflicting with ours will be granted in the meantime. |
| 384 | * |
| 385 | * Deadlocks involving the Startup process and an ordinary backend process |
| 386 | * will be detected by the deadlock detector within the ordinary backend. |
| 387 | */ |
| 388 | void |
| 389 | ResolveRecoveryConflictWithLock(LOCKTAG locktag) |
| 390 | { |
| 391 | TimestampTz ltime; |
| 392 | |
| 393 | Assert(InHotStandby); |
| 394 | |
| 395 | ltime = GetStandbyLimitTime(); |
| 396 | |
| 397 | if (GetCurrentTimestamp() >= ltime) |
| 398 | { |
| 399 | /* |
| 400 | * We're already behind, so clear a path as quickly as possible. |
| 401 | */ |
| 402 | VirtualTransactionId *backends; |
| 403 | |
| 404 | backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL); |
| 405 | ResolveRecoveryConflictWithVirtualXIDs(backends, |
| 406 | PROCSIG_RECOVERY_CONFLICT_LOCK); |
| 407 | } |
| 408 | else |
| 409 | { |
| 410 | /* |
| 411 | * Wait (or wait again) until ltime |
| 412 | */ |
| 413 | EnableTimeoutParams timeouts[1]; |
| 414 | |
| 415 | timeouts[0].id = STANDBY_LOCK_TIMEOUT; |
| 416 | timeouts[0].type = TMPARAM_AT; |
| 417 | timeouts[0].fin_time = ltime; |
| 418 | enable_timeouts(timeouts, 1); |
| 419 | } |
| 420 | |
| 421 | /* Wait to be signaled by the release of the Relation Lock */ |
| 422 | ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type); |
| 423 | |
| 424 | /* |
| 425 | * Clear any timeout requests established above. We assume here that the |
| 426 | * Startup process doesn't have any other outstanding timeouts than those |
| 427 | * used by this function. If that stops being true, we could cancel the |
| 428 | * timeouts individually, but that'd be slower. |
| 429 | */ |
| 430 | disable_all_timeouts(false); |
| 431 | } |
| 432 | |
| 433 | /* |
| 434 | * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup() |
| 435 | * to resolve conflicts with other backends holding buffer pins. |
| 436 | * |
| 437 | * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup() |
| 438 | * (when not InHotStandby) is performed here, for code clarity. |
| 439 | * |
| 440 | * We either resolve conflicts immediately or set a timeout to wake us at |
| 441 | * the limit of our patience. |
| 442 | * |
| 443 | * Resolve conflicts by sending a PROCSIG signal to all backends to check if |
| 444 | * they hold one of the buffer pins that is blocking Startup process. If so, |
| 445 | * those backends will take an appropriate error action, ERROR or FATAL. |
| 446 | * |
| 447 | * We also must check for deadlocks. Deadlocks occur because if queries |
| 448 | * wait on a lock, that must be behind an AccessExclusiveLock, which can only |
| 449 | * be cleared if the Startup process replays a transaction completion record. |
| 450 | * If Startup process is also waiting then that is a deadlock. The deadlock |
| 451 | * can occur if the query is waiting and then the Startup sleeps, or if |
| 452 | * Startup is sleeping and the query waits on a lock. We protect against |
| 453 | * only the former sequence here, the latter sequence is checked prior to |
| 454 | * the query sleeping, in CheckRecoveryConflictDeadlock(). |
| 455 | * |
| 456 | * Deadlocks are extremely rare, and relatively expensive to check for, |
| 457 | * so we don't do a deadlock check right away ... only if we have had to wait |
| 458 | * at least deadlock_timeout. |
| 459 | */ |
| 460 | void |
| 461 | ResolveRecoveryConflictWithBufferPin(void) |
| 462 | { |
| 463 | TimestampTz ltime; |
| 464 | |
| 465 | Assert(InHotStandby); |
| 466 | |
| 467 | ltime = GetStandbyLimitTime(); |
| 468 | |
| 469 | if (ltime == 0) |
| 470 | { |
| 471 | /* |
| 472 | * We're willing to wait forever for conflicts, so set timeout for |
| 473 | * deadlock check only |
| 474 | */ |
| 475 | enable_timeout_after(STANDBY_DEADLOCK_TIMEOUT, DeadlockTimeout); |
| 476 | } |
| 477 | else if (GetCurrentTimestamp() >= ltime) |
| 478 | { |
| 479 | /* |
| 480 | * We're already behind, so clear a path as quickly as possible. |
| 481 | */ |
| 482 | SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); |
| 483 | } |
| 484 | else |
| 485 | { |
| 486 | /* |
| 487 | * Wake up at ltime, and check for deadlocks as well if we will be |
| 488 | * waiting longer than deadlock_timeout |
| 489 | */ |
| 490 | EnableTimeoutParams timeouts[2]; |
| 491 | |
| 492 | timeouts[0].id = STANDBY_TIMEOUT; |
| 493 | timeouts[0].type = TMPARAM_AT; |
| 494 | timeouts[0].fin_time = ltime; |
| 495 | timeouts[1].id = STANDBY_DEADLOCK_TIMEOUT; |
| 496 | timeouts[1].type = TMPARAM_AFTER; |
| 497 | timeouts[1].delay_ms = DeadlockTimeout; |
| 498 | enable_timeouts(timeouts, 2); |
| 499 | } |
| 500 | |
| 501 | /* Wait to be signaled by UnpinBuffer() */ |
| 502 | ProcWaitForSignal(PG_WAIT_BUFFER_PIN); |
| 503 | |
| 504 | /* |
| 505 | * Clear any timeout requests established above. We assume here that the |
| 506 | * Startup process doesn't have any other timeouts than what this function |
| 507 | * uses. If that stops being true, we could cancel the timeouts |
| 508 | * individually, but that'd be slower. |
| 509 | */ |
| 510 | disable_all_timeouts(false); |
| 511 | } |
| 512 | |
| 513 | static void |
| 514 | SendRecoveryConflictWithBufferPin(ProcSignalReason reason) |
| 515 | { |
| 516 | Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN || |
| 517 | reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); |
| 518 | |
| 519 | /* |
| 520 | * We send signal to all backends to ask them if they are holding the |
| 521 | * buffer pin which is delaying the Startup process. We must not set the |
| 522 | * conflict flag yet, since most backends will be innocent. Let the |
| 523 | * SIGUSR1 handling in each backend decide their own fate. |
| 524 | */ |
| 525 | CancelDBBackends(InvalidOid, reason, false); |
| 526 | } |
| 527 | |
| 528 | /* |
| 529 | * In Hot Standby perform early deadlock detection. We abort the lock |
| 530 | * wait if we are about to sleep while holding the buffer pin that Startup |
| 531 | * process is waiting for. |
| 532 | * |
| 533 | * Note: this code is pessimistic, because there is no way for it to |
| 534 | * determine whether an actual deadlock condition is present: the lock we |
| 535 | * need to wait for might be unrelated to any held by the Startup process. |
| 536 | * Sooner or later, this mechanism should get ripped out in favor of somehow |
| 537 | * accounting for buffer locks in DeadLockCheck(). However, errors here |
| 538 | * seem to be very low-probability in practice, so for now it's not worth |
| 539 | * the trouble. |
| 540 | */ |
| 541 | void |
| 542 | CheckRecoveryConflictDeadlock(void) |
| 543 | { |
| 544 | Assert(!InRecovery); /* do not call in Startup process */ |
| 545 | |
| 546 | if (!HoldingBufferPinThatDelaysRecovery()) |
| 547 | return; |
| 548 | |
| 549 | /* |
| 550 | * Error message should match ProcessInterrupts() but we avoid calling |
| 551 | * that because we aren't handling an interrupt at this point. Note that |
| 552 | * we only cancel the current transaction here, so if we are in a |
| 553 | * subtransaction and the pin is held by a parent, then the Startup |
| 554 | * process will continue to wait even though we have avoided deadlock. |
| 555 | */ |
| 556 | ereport(ERROR, |
| 557 | (errcode(ERRCODE_T_R_DEADLOCK_DETECTED), |
| 558 | errmsg("canceling statement due to conflict with recovery" ), |
| 559 | errdetail("User transaction caused buffer deadlock with recovery." ))); |
| 560 | } |
| 561 | |
| 562 | |
| 563 | /* -------------------------------- |
| 564 | * timeout handler routines |
| 565 | * -------------------------------- |
| 566 | */ |
| 567 | |
| 568 | /* |
| 569 | * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT |
| 570 | * occurs before STANDBY_TIMEOUT. Send out a request for hot-standby |
| 571 | * backends to check themselves for deadlocks. |
| 572 | */ |
| 573 | void |
| 574 | StandbyDeadLockHandler(void) |
| 575 | { |
| 576 | SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); |
| 577 | } |
| 578 | |
| 579 | /* |
| 580 | * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded. |
| 581 | * Send out a request to release conflicting buffer pins unconditionally, |
| 582 | * so we can press ahead with applying changes in recovery. |
| 583 | */ |
| 584 | void |
| 585 | StandbyTimeoutHandler(void) |
| 586 | { |
| 587 | /* forget any pending STANDBY_DEADLOCK_TIMEOUT request */ |
| 588 | disable_timeout(STANDBY_DEADLOCK_TIMEOUT, false); |
| 589 | |
| 590 | SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); |
| 591 | } |
| 592 | |
| 593 | /* |
| 594 | * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded. |
| 595 | * This doesn't need to do anything, simply waking up is enough. |
| 596 | */ |
| 597 | void |
| 598 | StandbyLockTimeoutHandler(void) |
| 599 | { |
| 600 | } |
| 601 | |
| 602 | /* |
| 603 | * ----------------------------------------------------- |
| 604 | * Locking in Recovery Mode |
| 605 | * ----------------------------------------------------- |
| 606 | * |
| 607 | * All locks are held by the Startup process using a single virtual |
| 608 | * transaction. This implementation is both simpler and in some senses, |
| 609 | * more correct. The locks held mean "some original transaction held |
| 610 | * this lock, so query access is not allowed at this time". So the Startup |
| 611 | * process is the proxy by which the original locks are implemented. |
| 612 | * |
| 613 | * We only keep track of AccessExclusiveLocks, which are only ever held by |
| 614 | * one transaction on one relation. |
| 615 | * |
| 616 | * We keep a hash table of lists of locks in local memory keyed by xid, |
| 617 | * RecoveryLockLists, so we can keep track of the various entries made by |
| 618 | * the Startup process's virtual xid in the shared lock table. |
| 619 | * |
| 620 | * List elements use type xl_standby_lock, since the WAL record type exactly |
| 621 | * matches the information that we need to keep track of. |
| 622 | * |
| 623 | * We use session locks rather than normal locks so we don't need |
| 624 | * ResourceOwners. |
| 625 | */ |
| 626 | |
| 627 | |
| 628 | void |
| 629 | StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) |
| 630 | { |
| 631 | RecoveryLockListsEntry *entry; |
| 632 | xl_standby_lock *newlock; |
| 633 | LOCKTAG locktag; |
| 634 | bool found; |
| 635 | |
| 636 | /* Already processed? */ |
| 637 | if (!TransactionIdIsValid(xid) || |
| 638 | TransactionIdDidCommit(xid) || |
| 639 | TransactionIdDidAbort(xid)) |
| 640 | return; |
| 641 | |
| 642 | elog(trace_recovery(DEBUG4), |
| 643 | "adding recovery lock: db %u rel %u" , dbOid, relOid); |
| 644 | |
| 645 | /* dbOid is InvalidOid when we are locking a shared relation. */ |
| 646 | Assert(OidIsValid(relOid)); |
| 647 | |
| 648 | /* Create a new list for this xid, if we don't have one already. */ |
| 649 | entry = hash_search(RecoveryLockLists, &xid, HASH_ENTER, &found); |
| 650 | if (!found) |
| 651 | { |
| 652 | entry->xid = xid; |
| 653 | entry->locks = NIL; |
| 654 | } |
| 655 | |
| 656 | newlock = palloc(sizeof(xl_standby_lock)); |
| 657 | newlock->xid = xid; |
| 658 | newlock->dbOid = dbOid; |
| 659 | newlock->relOid = relOid; |
| 660 | entry->locks = lappend(entry->locks, newlock); |
| 661 | |
| 662 | SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid); |
| 663 | |
| 664 | (void) LockAcquire(&locktag, AccessExclusiveLock, true, false); |
| 665 | } |
| 666 | |
| 667 | static void |
| 668 | StandbyReleaseLockList(List *locks) |
| 669 | { |
| 670 | while (locks) |
| 671 | { |
| 672 | xl_standby_lock *lock = (xl_standby_lock *) linitial(locks); |
| 673 | LOCKTAG locktag; |
| 674 | |
| 675 | elog(trace_recovery(DEBUG4), |
| 676 | "releasing recovery lock: xid %u db %u rel %u" , |
| 677 | lock->xid, lock->dbOid, lock->relOid); |
| 678 | SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); |
| 679 | if (!LockRelease(&locktag, AccessExclusiveLock, true)) |
| 680 | { |
| 681 | elog(LOG, |
| 682 | "RecoveryLockLists contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u" , |
| 683 | lock->xid, lock->dbOid, lock->relOid); |
| 684 | Assert(false); |
| 685 | } |
| 686 | pfree(lock); |
| 687 | locks = list_delete_first(locks); |
| 688 | } |
| 689 | } |
| 690 | |
| 691 | static void |
| 692 | StandbyReleaseLocks(TransactionId xid) |
| 693 | { |
| 694 | RecoveryLockListsEntry *entry; |
| 695 | |
| 696 | if (TransactionIdIsValid(xid)) |
| 697 | { |
| 698 | if ((entry = hash_search(RecoveryLockLists, &xid, HASH_FIND, NULL))) |
| 699 | { |
| 700 | StandbyReleaseLockList(entry->locks); |
| 701 | hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL); |
| 702 | } |
| 703 | } |
| 704 | else |
| 705 | StandbyReleaseAllLocks(); |
| 706 | } |
| 707 | |
| 708 | /* |
| 709 | * Release locks for a transaction tree, starting at xid down, from |
| 710 | * RecoveryLockLists. |
| 711 | * |
| 712 | * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode, |
| 713 | * to remove any AccessExclusiveLocks requested by a transaction. |
| 714 | */ |
| 715 | void |
| 716 | StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids) |
| 717 | { |
| 718 | int i; |
| 719 | |
| 720 | StandbyReleaseLocks(xid); |
| 721 | |
| 722 | for (i = 0; i < nsubxids; i++) |
| 723 | StandbyReleaseLocks(subxids[i]); |
| 724 | } |
| 725 | |
| 726 | /* |
| 727 | * Called at end of recovery and when we see a shutdown checkpoint. |
| 728 | */ |
| 729 | void |
| 730 | StandbyReleaseAllLocks(void) |
| 731 | { |
| 732 | HASH_SEQ_STATUS status; |
| 733 | RecoveryLockListsEntry *entry; |
| 734 | |
| 735 | elog(trace_recovery(DEBUG2), "release all standby locks" ); |
| 736 | |
| 737 | hash_seq_init(&status, RecoveryLockLists); |
| 738 | while ((entry = hash_seq_search(&status))) |
| 739 | { |
| 740 | StandbyReleaseLockList(entry->locks); |
| 741 | hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL); |
| 742 | } |
| 743 | } |
| 744 | |
| 745 | /* |
| 746 | * StandbyReleaseOldLocks |
| 747 | * Release standby locks held by top-level XIDs that aren't running, |
| 748 | * as long as they're not prepared transactions. |
| 749 | */ |
| 750 | void |
| 751 | StandbyReleaseOldLocks(TransactionId oldxid) |
| 752 | { |
| 753 | HASH_SEQ_STATUS status; |
| 754 | RecoveryLockListsEntry *entry; |
| 755 | |
| 756 | hash_seq_init(&status, RecoveryLockLists); |
| 757 | while ((entry = hash_seq_search(&status))) |
| 758 | { |
| 759 | Assert(TransactionIdIsValid(entry->xid)); |
| 760 | |
| 761 | /* Skip if prepared transaction. */ |
| 762 | if (StandbyTransactionIdIsPrepared(entry->xid)) |
| 763 | continue; |
| 764 | |
| 765 | /* Skip if >= oldxid. */ |
| 766 | if (!TransactionIdPrecedes(entry->xid, oldxid)) |
| 767 | continue; |
| 768 | |
| 769 | /* Remove all locks and hash table entry. */ |
| 770 | StandbyReleaseLockList(entry->locks); |
| 771 | hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL); |
| 772 | } |
| 773 | } |
| 774 | |
| 775 | /* |
| 776 | * -------------------------------------------------------------------- |
| 777 | * Recovery handling for Rmgr RM_STANDBY_ID |
| 778 | * |
| 779 | * These record types will only be created if XLogStandbyInfoActive() |
| 780 | * -------------------------------------------------------------------- |
| 781 | */ |
| 782 | |
| 783 | void |
| 784 | standby_redo(XLogReaderState *record) |
| 785 | { |
| 786 | uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; |
| 787 | |
| 788 | /* Backup blocks are not used in standby records */ |
| 789 | Assert(!XLogRecHasAnyBlockRefs(record)); |
| 790 | |
| 791 | /* Do nothing if we're not in hot standby mode */ |
| 792 | if (standbyState == STANDBY_DISABLED) |
| 793 | return; |
| 794 | |
| 795 | if (info == XLOG_STANDBY_LOCK) |
| 796 | { |
| 797 | xl_standby_locks *xlrec = (xl_standby_locks *) XLogRecGetData(record); |
| 798 | int i; |
| 799 | |
| 800 | for (i = 0; i < xlrec->nlocks; i++) |
| 801 | StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid, |
| 802 | xlrec->locks[i].dbOid, |
| 803 | xlrec->locks[i].relOid); |
| 804 | } |
| 805 | else if (info == XLOG_RUNNING_XACTS) |
| 806 | { |
| 807 | xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record); |
| 808 | RunningTransactionsData running; |
| 809 | |
| 810 | running.xcnt = xlrec->xcnt; |
| 811 | running.subxcnt = xlrec->subxcnt; |
| 812 | running.subxid_overflow = xlrec->subxid_overflow; |
| 813 | running.nextXid = xlrec->nextXid; |
| 814 | running.latestCompletedXid = xlrec->latestCompletedXid; |
| 815 | running.oldestRunningXid = xlrec->oldestRunningXid; |
| 816 | running.xids = xlrec->xids; |
| 817 | |
| 818 | ProcArrayApplyRecoveryInfo(&running); |
| 819 | } |
| 820 | else if (info == XLOG_INVALIDATIONS) |
| 821 | { |
| 822 | xl_invalidations *xlrec = (xl_invalidations *) XLogRecGetData(record); |
| 823 | |
| 824 | ProcessCommittedInvalidationMessages(xlrec->msgs, |
| 825 | xlrec->nmsgs, |
| 826 | xlrec->relcacheInitFileInval, |
| 827 | xlrec->dbId, |
| 828 | xlrec->tsId); |
| 829 | } |
| 830 | else |
| 831 | elog(PANIC, "standby_redo: unknown op code %u" , info); |
| 832 | } |
| 833 | |
| 834 | /* |
| 835 | * Log details of the current snapshot to WAL. This allows the snapshot state |
| 836 | * to be reconstructed on the standby and for logical decoding. |
| 837 | * |
| 838 | * This is used for Hot Standby as follows: |
| 839 | * |
| 840 | * We can move directly to STANDBY_SNAPSHOT_READY at startup if we |
| 841 | * start from a shutdown checkpoint because we know nothing was running |
| 842 | * at that time and our recovery snapshot is known empty. In the more |
| 843 | * typical case of an online checkpoint we need to jump through a few |
| 844 | * hoops to get a correct recovery snapshot and this requires a two or |
| 845 | * sometimes a three stage process. |
| 846 | * |
| 847 | * The initial snapshot must contain all running xids and all current |
| 848 | * AccessExclusiveLocks at a point in time on the standby. Assembling |
| 849 | * that information while the server is running requires many and |
| 850 | * various LWLocks, so we choose to derive that information piece by |
| 851 | * piece and then re-assemble that info on the standby. When that |
| 852 | * information is fully assembled we move to STANDBY_SNAPSHOT_READY. |
| 853 | * |
| 854 | * Since locking on the primary when we derive the information is not |
| 855 | * strict, we note that there is a time window between the derivation and |
| 856 | * writing to WAL of the derived information. That allows race conditions |
| 857 | * that we must resolve, since xids and locks may enter or leave the |
| 858 | * snapshot during that window. This creates the issue that an xid or |
| 859 | * lock may start *after* the snapshot has been derived yet *before* the |
| 860 | * snapshot is logged in the running xacts WAL record. We resolve this by |
| 861 | * starting to accumulate changes at a point just prior to when we derive |
| 862 | * the snapshot on the primary, then ignore duplicates when we later apply |
| 863 | * the snapshot from the running xacts record. This is implemented during |
| 864 | * CreateCheckpoint() where we use the logical checkpoint location as |
| 865 | * our starting point and then write the running xacts record immediately |
| 866 | * before writing the main checkpoint WAL record. Since we always start |
| 867 | * up from a checkpoint and are immediately at our starting point, we |
| 868 | * unconditionally move to STANDBY_INITIALIZED. After this point we |
| 869 | * must do 4 things: |
| 870 | * * move shared nextFullXid forwards as we see new xids |
| 871 | * * extend the clog and subtrans with each new xid |
| 872 | * * keep track of uncommitted known assigned xids |
| 873 | * * keep track of uncommitted AccessExclusiveLocks |
| 874 | * |
| 875 | * When we see a commit/abort we must remove known assigned xids and locks |
| 876 | * from the completing transaction. Attempted removals that cannot locate |
| 877 | * an entry are expected and must not cause an error when we are in state |
| 878 | * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and |
| 879 | * KnownAssignedXidsRemove(). |
| 880 | * |
| 881 | * Later, when we apply the running xact data we must be careful to ignore |
| 882 | * transactions already committed, since those commits raced ahead when |
| 883 | * making WAL entries. |
| 884 | * |
| 885 | * The loose timing also means that locks may be recorded that have a |
| 886 | * zero xid, since xids are removed from procs before locks are removed. |
| 887 | * So we must prune the lock list down to ensure we hold locks only for |
| 888 | * currently running xids, performed by StandbyReleaseOldLocks(). |
| 889 | * Zero xids should no longer be possible, but we may be replaying WAL |
| 890 | * from a time when they were possible. |
| 891 | * |
| 892 | * For logical decoding only the running xacts information is needed; |
| 893 | * there's no need to look at the locking information, but it's logged anyway, |
| 894 | * as there's no independent knob to just enable logical decoding. For |
| 895 | * details of how this is used, check snapbuild.c's introductory comment. |
| 896 | * |
| 897 | * |
| 898 | * Returns the RecPtr of the last inserted record. |
| 899 | */ |
| 900 | XLogRecPtr |
| 901 | LogStandbySnapshot(void) |
| 902 | { |
| 903 | XLogRecPtr recptr; |
| 904 | RunningTransactions running; |
| 905 | xl_standby_lock *locks; |
| 906 | int nlocks; |
| 907 | |
| 908 | Assert(XLogStandbyInfoActive()); |
| 909 | |
| 910 | /* |
| 911 | * Get details of any AccessExclusiveLocks being held at the moment. |
| 912 | */ |
| 913 | locks = GetRunningTransactionLocks(&nlocks); |
| 914 | if (nlocks > 0) |
| 915 | LogAccessExclusiveLocks(nlocks, locks); |
| 916 | pfree(locks); |
| 917 | |
| 918 | /* |
| 919 | * Log details of all in-progress transactions. This should be the last |
| 920 | * record we write, because standby will open up when it sees this. |
| 921 | */ |
| 922 | running = GetRunningTransactionData(); |
| 923 | |
| 924 | /* |
| 925 | * GetRunningTransactionData() acquired ProcArrayLock, we must release it. |
| 926 | * For Hot Standby this can be done before inserting the WAL record |
| 927 | * because ProcArrayApplyRecoveryInfo() rechecks the commit status using |
| 928 | * the clog. For logical decoding, though, the lock can't be released |
| 929 | * early because the clog might be "in the future" from the POV of the |
| 930 | * historic snapshot. This would allow for situations where we're waiting |
| 931 | * for the end of a transaction listed in the xl_running_xacts record |
| 932 | * which, according to the WAL, has committed before the xl_running_xacts |
| 933 | * record. Fortunately this routine isn't executed frequently, and it's |
| 934 | * only a shared lock. |
| 935 | */ |
| 936 | if (wal_level < WAL_LEVEL_LOGICAL) |
| 937 | LWLockRelease(ProcArrayLock); |
| 938 | |
| 939 | recptr = LogCurrentRunningXacts(running); |
| 940 | |
| 941 | /* Release lock if we kept it longer ... */ |
| 942 | if (wal_level >= WAL_LEVEL_LOGICAL) |
| 943 | LWLockRelease(ProcArrayLock); |
| 944 | |
| 945 | /* GetRunningTransactionData() acquired XidGenLock, we must release it */ |
| 946 | LWLockRelease(XidGenLock); |
| 947 | |
| 948 | return recptr; |
| 949 | } |
| 950 | |
| 951 | /* |
| 952 | * Record an enhanced snapshot of running transactions into WAL. |
| 953 | * |
| 954 | * The definitions of RunningTransactionsData and xl_xact_running_xacts are |
| 955 | * similar. We keep them separate because xl_xact_running_xacts is a |
| 956 | * contiguous chunk of memory and never exists fully until it is assembled in |
| 957 | * WAL. The inserted records are marked as not being important for durability, |
| 958 | * to avoid triggering superfluous checkpoint / archiving activity. |
| 959 | */ |
| 960 | static XLogRecPtr |
| 961 | LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) |
| 962 | { |
| 963 | xl_running_xacts xlrec; |
| 964 | XLogRecPtr recptr; |
| 965 | |
| 966 | xlrec.xcnt = CurrRunningXacts->xcnt; |
| 967 | xlrec.subxcnt = CurrRunningXacts->subxcnt; |
| 968 | xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow; |
| 969 | xlrec.nextXid = CurrRunningXacts->nextXid; |
| 970 | xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid; |
| 971 | xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid; |
| 972 | |
| 973 | /* Header */ |
| 974 | XLogBeginInsert(); |
| 975 | XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); |
| 976 | XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts); |
| 977 | |
| 978 | /* array of TransactionIds */ |
| 979 | if (xlrec.xcnt > 0) |
| 980 | XLogRegisterData((char *) CurrRunningXacts->xids, |
| 981 | (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId)); |
| 982 | |
| 983 | recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS); |
| 984 | |
| 985 | if (CurrRunningXacts->subxid_overflow) |
| 986 | elog(trace_recovery(DEBUG2), |
| 987 | "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)" , |
| 988 | CurrRunningXacts->xcnt, |
| 989 | (uint32) (recptr >> 32), (uint32) recptr, |
| 990 | CurrRunningXacts->oldestRunningXid, |
| 991 | CurrRunningXacts->latestCompletedXid, |
| 992 | CurrRunningXacts->nextXid); |
| 993 | else |
| 994 | elog(trace_recovery(DEBUG2), |
| 995 | "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)" , |
| 996 | CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt, |
| 997 | (uint32) (recptr >> 32), (uint32) recptr, |
| 998 | CurrRunningXacts->oldestRunningXid, |
| 999 | CurrRunningXacts->latestCompletedXid, |
| 1000 | CurrRunningXacts->nextXid); |
| 1001 | |
| 1002 | /* |
| 1003 | * Ensure running_xacts information is synced to disk not too far in the |
| 1004 | * future. We don't want to stall anything though (i.e. use XLogFlush()), |
| 1005 | * so we let the wal writer do it during normal operation. |
| 1006 | * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced |
| 1007 | * and nudge the WALWriter into action if sleeping. Check |
| 1008 | * XLogBackgroundFlush() for details why a record might not be flushed |
| 1009 | * without it. |
| 1010 | */ |
| 1011 | XLogSetAsyncXactLSN(recptr); |
| 1012 | |
| 1013 | return recptr; |
| 1014 | } |
| 1015 | |
| 1016 | /* |
| 1017 | * Wholesale logging of AccessExclusiveLocks. Other lock types need not be |
| 1018 | * logged, as described in backend/storage/lmgr/README. |
| 1019 | */ |
| 1020 | static void |
| 1021 | LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks) |
| 1022 | { |
| 1023 | xl_standby_locks xlrec; |
| 1024 | |
| 1025 | xlrec.nlocks = nlocks; |
| 1026 | |
| 1027 | XLogBeginInsert(); |
| 1028 | XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks)); |
| 1029 | XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock)); |
| 1030 | XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); |
| 1031 | |
| 1032 | (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); |
| 1033 | } |
| 1034 | |
| 1035 | /* |
| 1036 | * Individual logging of AccessExclusiveLocks for use during LockAcquire() |
| 1037 | */ |
| 1038 | void |
| 1039 | LogAccessExclusiveLock(Oid dbOid, Oid relOid) |
| 1040 | { |
| 1041 | xl_standby_lock xlrec; |
| 1042 | |
| 1043 | xlrec.xid = GetCurrentTransactionId(); |
| 1044 | |
| 1045 | xlrec.dbOid = dbOid; |
| 1046 | xlrec.relOid = relOid; |
| 1047 | |
| 1048 | LogAccessExclusiveLocks(1, &xlrec); |
| 1049 | MyXactFlags |= XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK; |
| 1050 | } |
| 1051 | |
| 1052 | /* |
| 1053 | * Prepare to log an AccessExclusiveLock, for use during LockAcquire() |
| 1054 | */ |
| 1055 | void |
| 1056 | LogAccessExclusiveLockPrepare(void) |
| 1057 | { |
| 1058 | /* |
| 1059 | * Ensure that a TransactionId has been assigned to this transaction, for |
| 1060 | * two reasons, both related to lock release on the standby. First, we |
| 1061 | * must assign an xid so that RecordTransactionCommit() and |
| 1062 | * RecordTransactionAbort() do not optimise away the transaction |
| 1063 | * completion record which recovery relies upon to release locks. It's a |
| 1064 | * hack, but for a corner case not worth adding code for into the main |
| 1065 | * commit path. Second, we must assign an xid before the lock is recorded |
| 1066 | * in shared memory, otherwise a concurrently executing |
| 1067 | * GetRunningTransactionLocks() might see a lock associated with an |
| 1068 | * InvalidTransactionId which we later assert cannot happen. |
| 1069 | */ |
| 1070 | (void) GetCurrentTransactionId(); |
| 1071 | } |
| 1072 | |
| 1073 | /* |
| 1074 | * Emit WAL for invalidations. This currently is only used for commits without |
| 1075 | * an xid but which contain invalidations. |
| 1076 | */ |
| 1077 | void |
| 1078 | LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, |
| 1079 | bool relcacheInitFileInval) |
| 1080 | { |
| 1081 | xl_invalidations xlrec; |
| 1082 | |
| 1083 | /* prepare record */ |
| 1084 | memset(&xlrec, 0, sizeof(xlrec)); |
| 1085 | xlrec.dbId = MyDatabaseId; |
| 1086 | xlrec.tsId = MyDatabaseTableSpace; |
| 1087 | xlrec.relcacheInitFileInval = relcacheInitFileInval; |
| 1088 | xlrec.nmsgs = nmsgs; |
| 1089 | |
| 1090 | /* perform insertion */ |
| 1091 | XLogBeginInsert(); |
| 1092 | XLogRegisterData((char *) (&xlrec), MinSizeOfInvalidations); |
| 1093 | XLogRegisterData((char *) msgs, |
| 1094 | nmsgs * sizeof(SharedInvalidationMessage)); |
| 1095 | XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS); |
| 1096 | } |
| 1097 | |