| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * clog.c |
| 4 | * PostgreSQL transaction-commit-log manager |
| 5 | * |
| 6 | * This module replaces the old "pg_log" access code, which treated pg_log |
| 7 | * essentially like a relation, in that it went through the regular buffer |
| 8 | * manager. The problem with that was that there wasn't any good way to |
| 9 | * recycle storage space for transactions so old that they'll never be |
| 10 | * looked up again. Now we use specialized access code so that the commit |
| 11 | * log can be broken into relatively small, independent segments. |
| 12 | * |
| 13 | * XLOG interactions: this module generates an XLOG record whenever a new |
| 14 | * CLOG page is initialized to zeroes. Other writes of CLOG come from |
| 15 | * recording of transaction commit or abort in xact.c, which generates its |
| 16 | * own XLOG records for these events and will re-perform the status update |
| 17 | * on redo; so we need make no additional XLOG entry here. For synchronous |
| 18 | * transaction commits, the XLOG is guaranteed flushed through the XLOG commit |
| 19 | * record before we are called to log a commit, so the WAL rule "write xlog |
| 20 | * before data" is satisfied automatically. However, for async commits we |
| 21 | * must track the latest LSN affecting each CLOG page, so that we can flush |
| 22 | * XLOG that far and satisfy the WAL rule. We don't have to worry about this |
| 23 | * for aborts (whether sync or async), since the post-crash assumption would |
| 24 | * be that such transactions failed anyway. |
| 25 | * |
| 26 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 27 | * Portions Copyright (c) 1994, Regents of the University of California |
| 28 | * |
| 29 | * src/backend/access/transam/clog.c |
| 30 | * |
| 31 | *------------------------------------------------------------------------- |
| 32 | */ |
| 33 | #include "postgres.h" |
| 34 | |
| 35 | #include "access/clog.h" |
| 36 | #include "access/slru.h" |
| 37 | #include "access/transam.h" |
| 38 | #include "access/xlog.h" |
| 39 | #include "access/xloginsert.h" |
| 40 | #include "access/xlogutils.h" |
| 41 | #include "miscadmin.h" |
| 42 | #include "pgstat.h" |
| 43 | #include "pg_trace.h" |
| 44 | #include "storage/proc.h" |
| 45 | |
| 46 | /* |
| 47 | * Defines for CLOG page sizes. A page is the same BLCKSZ as is used |
| 48 | * everywhere else in Postgres. |
| 49 | * |
| 50 | * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, |
| 51 | * CLOG page numbering also wraps around at 0xFFFFFFFF/CLOG_XACTS_PER_PAGE, |
| 52 | * and CLOG segment numbering at |
| 53 | * 0xFFFFFFFF/CLOG_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need take no |
| 54 | * explicit notice of that fact in this module, except when comparing segment |
| 55 | * and page numbers in TruncateCLOG (see CLOGPagePrecedes). |
| 56 | */ |
| 57 | |
| 58 | /* We need two bits per xact, so four xacts fit in a byte */ |
| 59 | #define CLOG_BITS_PER_XACT 2 |
| 60 | #define CLOG_XACTS_PER_BYTE 4 |
| 61 | #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) |
| 62 | #define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1) |
| 63 | |
| 64 | #define TransactionIdToPage(xid) ((xid) / (TransactionId) CLOG_XACTS_PER_PAGE) |
| 65 | #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) |
| 66 | #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE) |
| 67 | #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE) |
| 68 | |
| 69 | /* We store the latest async LSN for each group of transactions */ |
| 70 | #define CLOG_XACTS_PER_LSN_GROUP 32 /* keep this a power of 2 */ |
| 71 | #define CLOG_LSNS_PER_PAGE (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP) |
| 72 | |
| 73 | #define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \ |
| 74 | ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP) |
| 75 | |
| 76 | /* |
| 77 | * The number of subtransactions below which we consider to apply clog group |
| 78 | * update optimization. Testing reveals that the number higher than this can |
| 79 | * hurt performance. |
| 80 | */ |
| 81 | #define THRESHOLD_SUBTRANS_CLOG_OPT 5 |
| 82 | |
| 83 | /* |
| 84 | * Link to shared-memory data structures for CLOG control |
| 85 | */ |
| 86 | static SlruCtlData ClogCtlData; |
| 87 | |
| 88 | #define ClogCtl (&ClogCtlData) |
| 89 | |
| 90 | |
| 91 | static int ZeroCLOGPage(int pageno, bool writeXlog); |
| 92 | static bool CLOGPagePrecedes(int page1, int page2); |
| 93 | static void WriteZeroPageXlogRec(int pageno); |
| 94 | static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact, |
| 95 | Oid oldestXidDb); |
| 96 | static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, |
| 97 | TransactionId *subxids, XidStatus status, |
| 98 | XLogRecPtr lsn, int pageno, |
| 99 | bool all_xact_same_page); |
| 100 | static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, |
| 101 | XLogRecPtr lsn, int slotno); |
| 102 | static void set_status_by_pages(int nsubxids, TransactionId *subxids, |
| 103 | XidStatus status, XLogRecPtr lsn); |
| 104 | static bool TransactionGroupUpdateXidStatus(TransactionId xid, |
| 105 | XidStatus status, XLogRecPtr lsn, int pageno); |
| 106 | static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, |
| 107 | TransactionId *subxids, XidStatus status, |
| 108 | XLogRecPtr lsn, int pageno); |
| 109 | |
| 110 | |
| 111 | /* |
| 112 | * TransactionIdSetTreeStatus |
| 113 | * |
| 114 | * Record the final state of transaction entries in the commit log for |
| 115 | * a transaction and its subtransaction tree. Take care to ensure this is |
| 116 | * efficient, and as atomic as possible. |
| 117 | * |
| 118 | * xid is a single xid to set status for. This will typically be |
| 119 | * the top level transactionid for a top level commit or abort. It can |
| 120 | * also be a subtransaction when we record transaction aborts. |
| 121 | * |
| 122 | * subxids is an array of xids of length nsubxids, representing subtransactions |
| 123 | * in the tree of xid. In various cases nsubxids may be zero. |
| 124 | * |
| 125 | * lsn must be the WAL location of the commit record when recording an async |
| 126 | * commit. For a synchronous commit it can be InvalidXLogRecPtr, since the |
| 127 | * caller guarantees the commit record is already flushed in that case. It |
| 128 | * should be InvalidXLogRecPtr for abort cases, too. |
| 129 | * |
| 130 | * In the commit case, atomicity is limited by whether all the subxids are in |
| 131 | * the same CLOG page as xid. If they all are, then the lock will be grabbed |
| 132 | * only once, and the status will be set to committed directly. Otherwise |
| 133 | * we must |
| 134 | * 1. set sub-committed all subxids that are not on the same page as the |
| 135 | * main xid |
| 136 | * 2. atomically set committed the main xid and the subxids on the same page |
| 137 | * 3. go over the first bunch again and set them committed |
| 138 | * Note that as far as concurrent checkers are concerned, main transaction |
| 139 | * commit as a whole is still atomic. |
| 140 | * |
| 141 | * Example: |
| 142 | * TransactionId t commits and has subxids t1, t2, t3, t4 |
| 143 | * t is on page p1, t1 is also on p1, t2 and t3 are on p2, t4 is on p3 |
| 144 | * 1. update pages2-3: |
| 145 | * page2: set t2,t3 as sub-committed |
| 146 | * page3: set t4 as sub-committed |
| 147 | * 2. update page1: |
| 148 | * set t1 as sub-committed, |
| 149 | * then set t as committed, |
| 150 | then set t1 as committed |
| 151 | * 3. update pages2-3: |
| 152 | * page2: set t2,t3 as committed |
| 153 | * page3: set t4 as committed |
| 154 | * |
| 155 | * NB: this is a low-level routine and is NOT the preferred entry point |
| 156 | * for most uses; functions in transam.c are the intended callers. |
| 157 | * |
| 158 | * XXX Think about issuing FADVISE_WILLNEED on pages that we will need, |
| 159 | * but aren't yet in cache, as well as hinting pages not to fall out of |
| 160 | * cache yet. |
| 161 | */ |
| 162 | void |
| 163 | TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, |
| 164 | TransactionId *subxids, XidStatus status, XLogRecPtr lsn) |
| 165 | { |
| 166 | int pageno = TransactionIdToPage(xid); /* get page of parent */ |
| 167 | int i; |
| 168 | |
| 169 | Assert(status == TRANSACTION_STATUS_COMMITTED || |
| 170 | status == TRANSACTION_STATUS_ABORTED); |
| 171 | |
| 172 | /* |
| 173 | * See how many subxids, if any, are on the same page as the parent, if |
| 174 | * any. |
| 175 | */ |
| 176 | for (i = 0; i < nsubxids; i++) |
| 177 | { |
| 178 | if (TransactionIdToPage(subxids[i]) != pageno) |
| 179 | break; |
| 180 | } |
| 181 | |
| 182 | /* |
| 183 | * Do all items fit on a single page? |
| 184 | */ |
| 185 | if (i == nsubxids) |
| 186 | { |
| 187 | /* |
| 188 | * Set the parent and all subtransactions in a single call |
| 189 | */ |
| 190 | TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn, |
| 191 | pageno, true); |
| 192 | } |
| 193 | else |
| 194 | { |
| 195 | int nsubxids_on_first_page = i; |
| 196 | |
| 197 | /* |
| 198 | * If this is a commit then we care about doing this correctly (i.e. |
| 199 | * using the subcommitted intermediate status). By here, we know |
| 200 | * we're updating more than one page of clog, so we must mark entries |
| 201 | * that are *not* on the first page so that they show as subcommitted |
| 202 | * before we then return to update the status to fully committed. |
| 203 | * |
| 204 | * To avoid touching the first page twice, skip marking subcommitted |
| 205 | * for the subxids on that first page. |
| 206 | */ |
| 207 | if (status == TRANSACTION_STATUS_COMMITTED) |
| 208 | set_status_by_pages(nsubxids - nsubxids_on_first_page, |
| 209 | subxids + nsubxids_on_first_page, |
| 210 | TRANSACTION_STATUS_SUB_COMMITTED, lsn); |
| 211 | |
| 212 | /* |
| 213 | * Now set the parent and subtransactions on same page as the parent, |
| 214 | * if any |
| 215 | */ |
| 216 | pageno = TransactionIdToPage(xid); |
| 217 | TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status, |
| 218 | lsn, pageno, false); |
| 219 | |
| 220 | /* |
| 221 | * Now work through the rest of the subxids one clog page at a time, |
| 222 | * starting from the second page onwards, like we did above. |
| 223 | */ |
| 224 | set_status_by_pages(nsubxids - nsubxids_on_first_page, |
| 225 | subxids + nsubxids_on_first_page, |
| 226 | status, lsn); |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | /* |
| 231 | * Helper for TransactionIdSetTreeStatus: set the status for a bunch of |
| 232 | * transactions, chunking in the separate CLOG pages involved. We never |
| 233 | * pass the whole transaction tree to this function, only subtransactions |
| 234 | * that are on different pages to the top level transaction id. |
| 235 | */ |
| 236 | static void |
| 237 | set_status_by_pages(int nsubxids, TransactionId *subxids, |
| 238 | XidStatus status, XLogRecPtr lsn) |
| 239 | { |
| 240 | int pageno = TransactionIdToPage(subxids[0]); |
| 241 | int offset = 0; |
| 242 | int i = 0; |
| 243 | |
| 244 | Assert(nsubxids > 0); /* else the pageno fetch above is unsafe */ |
| 245 | |
| 246 | while (i < nsubxids) |
| 247 | { |
| 248 | int num_on_page = 0; |
| 249 | int nextpageno; |
| 250 | |
| 251 | do |
| 252 | { |
| 253 | nextpageno = TransactionIdToPage(subxids[i]); |
| 254 | if (nextpageno != pageno) |
| 255 | break; |
| 256 | num_on_page++; |
| 257 | i++; |
| 258 | } while (i < nsubxids); |
| 259 | |
| 260 | TransactionIdSetPageStatus(InvalidTransactionId, |
| 261 | num_on_page, subxids + offset, |
| 262 | status, lsn, pageno, false); |
| 263 | offset = i; |
| 264 | pageno = nextpageno; |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | /* |
| 269 | * Record the final state of transaction entries in the commit log for all |
| 270 | * entries on a single page. Atomic only on this page. |
| 271 | */ |
| 272 | static void |
| 273 | TransactionIdSetPageStatus(TransactionId xid, int nsubxids, |
| 274 | TransactionId *subxids, XidStatus status, |
| 275 | XLogRecPtr lsn, int pageno, |
| 276 | bool all_xact_same_page) |
| 277 | { |
| 278 | /* Can't use group update when PGPROC overflows. */ |
| 279 | StaticAssertStmt(THRESHOLD_SUBTRANS_CLOG_OPT <= PGPROC_MAX_CACHED_SUBXIDS, |
| 280 | "group clog threshold less than PGPROC cached subxids" ); |
| 281 | |
| 282 | /* |
| 283 | * When there is contention on CLogControlLock, we try to group multiple |
| 284 | * updates; a single leader process will perform transaction status |
| 285 | * updates for multiple backends so that the number of times |
| 286 | * CLogControlLock needs to be acquired is reduced. |
| 287 | * |
| 288 | * For this optimization to be safe, the XID in MyPgXact and the subxids |
| 289 | * in MyProc must be the same as the ones for which we're setting the |
| 290 | * status. Check that this is the case. |
| 291 | * |
| 292 | * For this optimization to be efficient, we shouldn't have too many |
| 293 | * sub-XIDs and all of the XIDs for which we're adjusting clog should be |
| 294 | * on the same page. Check those conditions, too. |
| 295 | */ |
| 296 | if (all_xact_same_page && xid == MyPgXact->xid && |
| 297 | nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT && |
| 298 | nsubxids == MyPgXact->nxids && |
| 299 | memcmp(subxids, MyProc->subxids.xids, |
| 300 | nsubxids * sizeof(TransactionId)) == 0) |
| 301 | { |
| 302 | /* |
| 303 | * We don't try to do group update optimization if a process has |
| 304 | * overflowed the subxids array in its PGPROC, since in that case we |
| 305 | * don't have a complete list of XIDs for it. |
| 306 | */ |
| 307 | Assert(THRESHOLD_SUBTRANS_CLOG_OPT <= PGPROC_MAX_CACHED_SUBXIDS); |
| 308 | |
| 309 | /* |
| 310 | * If we can immediately acquire CLogControlLock, we update the status |
| 311 | * of our own XID and release the lock. If not, try use group XID |
| 312 | * update. If that doesn't work out, fall back to waiting for the |
| 313 | * lock to perform an update for this transaction only. |
| 314 | */ |
| 315 | if (LWLockConditionalAcquire(CLogControlLock, LW_EXCLUSIVE)) |
| 316 | { |
| 317 | /* Got the lock without waiting! Do the update. */ |
| 318 | TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status, |
| 319 | lsn, pageno); |
| 320 | LWLockRelease(CLogControlLock); |
| 321 | return; |
| 322 | } |
| 323 | else if (TransactionGroupUpdateXidStatus(xid, status, lsn, pageno)) |
| 324 | { |
| 325 | /* Group update mechanism has done the work. */ |
| 326 | return; |
| 327 | } |
| 328 | |
| 329 | /* Fall through only if update isn't done yet. */ |
| 330 | } |
| 331 | |
| 332 | /* Group update not applicable, or couldn't accept this page number. */ |
| 333 | LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); |
| 334 | TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status, |
| 335 | lsn, pageno); |
| 336 | LWLockRelease(CLogControlLock); |
| 337 | } |
| 338 | |
| 339 | /* |
| 340 | * Record the final state of transaction entry in the commit log |
| 341 | * |
| 342 | * We don't do any locking here; caller must handle that. |
| 343 | */ |
| 344 | static void |
| 345 | TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, |
| 346 | TransactionId *subxids, XidStatus status, |
| 347 | XLogRecPtr lsn, int pageno) |
| 348 | { |
| 349 | int slotno; |
| 350 | int i; |
| 351 | |
| 352 | Assert(status == TRANSACTION_STATUS_COMMITTED || |
| 353 | status == TRANSACTION_STATUS_ABORTED || |
| 354 | (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid))); |
| 355 | Assert(LWLockHeldByMeInMode(CLogControlLock, LW_EXCLUSIVE)); |
| 356 | |
| 357 | /* |
| 358 | * If we're doing an async commit (ie, lsn is valid), then we must wait |
| 359 | * for any active write on the page slot to complete. Otherwise our |
| 360 | * update could reach disk in that write, which will not do since we |
| 361 | * mustn't let it reach disk until we've done the appropriate WAL flush. |
| 362 | * But when lsn is invalid, it's OK to scribble on a page while it is |
| 363 | * write-busy, since we don't care if the update reaches disk sooner than |
| 364 | * we think. |
| 365 | */ |
| 366 | slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid); |
| 367 | |
| 368 | /* |
| 369 | * Set the main transaction id, if any. |
| 370 | * |
| 371 | * If we update more than one xid on this page while it is being written |
| 372 | * out, we might find that some of the bits go to disk and others don't. |
| 373 | * If we are updating commits on the page with the top-level xid that |
| 374 | * could break atomicity, so we subcommit the subxids first before we mark |
| 375 | * the top-level commit. |
| 376 | */ |
| 377 | if (TransactionIdIsValid(xid)) |
| 378 | { |
| 379 | /* Subtransactions first, if needed ... */ |
| 380 | if (status == TRANSACTION_STATUS_COMMITTED) |
| 381 | { |
| 382 | for (i = 0; i < nsubxids; i++) |
| 383 | { |
| 384 | Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i])); |
| 385 | TransactionIdSetStatusBit(subxids[i], |
| 386 | TRANSACTION_STATUS_SUB_COMMITTED, |
| 387 | lsn, slotno); |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | /* ... then the main transaction */ |
| 392 | TransactionIdSetStatusBit(xid, status, lsn, slotno); |
| 393 | } |
| 394 | |
| 395 | /* Set the subtransactions */ |
| 396 | for (i = 0; i < nsubxids; i++) |
| 397 | { |
| 398 | Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i])); |
| 399 | TransactionIdSetStatusBit(subxids[i], status, lsn, slotno); |
| 400 | } |
| 401 | |
| 402 | ClogCtl->shared->page_dirty[slotno] = true; |
| 403 | } |
| 404 | |
| 405 | /* |
| 406 | * When we cannot immediately acquire CLogControlLock in exclusive mode at |
| 407 | * commit time, add ourselves to a list of processes that need their XIDs |
| 408 | * status update. The first process to add itself to the list will acquire |
| 409 | * CLogControlLock in exclusive mode and set transaction status as required |
| 410 | * on behalf of all group members. This avoids a great deal of contention |
| 411 | * around CLogControlLock when many processes are trying to commit at once, |
| 412 | * since the lock need not be repeatedly handed off from one committing |
| 413 | * process to the next. |
| 414 | * |
| 415 | * Returns true when transaction status has been updated in clog; returns |
| 416 | * false if we decided against applying the optimization because the page |
| 417 | * number we need to update differs from those processes already waiting. |
| 418 | */ |
| 419 | static bool |
| 420 | TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, |
| 421 | XLogRecPtr lsn, int pageno) |
| 422 | { |
| 423 | volatile PROC_HDR *procglobal = ProcGlobal; |
| 424 | PGPROC *proc = MyProc; |
| 425 | uint32 nextidx; |
| 426 | uint32 wakeidx; |
| 427 | |
| 428 | /* We should definitely have an XID whose status needs to be updated. */ |
| 429 | Assert(TransactionIdIsValid(xid)); |
| 430 | |
| 431 | /* |
| 432 | * Add ourselves to the list of processes needing a group XID status |
| 433 | * update. |
| 434 | */ |
| 435 | proc->clogGroupMember = true; |
| 436 | proc->clogGroupMemberXid = xid; |
| 437 | proc->clogGroupMemberXidStatus = status; |
| 438 | proc->clogGroupMemberPage = pageno; |
| 439 | proc->clogGroupMemberLsn = lsn; |
| 440 | |
| 441 | nextidx = pg_atomic_read_u32(&procglobal->clogGroupFirst); |
| 442 | |
| 443 | while (true) |
| 444 | { |
| 445 | /* |
| 446 | * Add the proc to list, if the clog page where we need to update the |
| 447 | * current transaction status is same as group leader's clog page. |
| 448 | * |
| 449 | * There is a race condition here, which is that after doing the below |
| 450 | * check and before adding this proc's clog update to a group, the |
| 451 | * group leader might have already finished the group update for this |
| 452 | * page and becomes group leader of another group. This will lead to a |
| 453 | * situation where a single group can have different clog page |
| 454 | * updates. This isn't likely and will still work, just maybe a bit |
| 455 | * less efficiently. |
| 456 | */ |
| 457 | if (nextidx != INVALID_PGPROCNO && |
| 458 | ProcGlobal->allProcs[nextidx].clogGroupMemberPage != proc->clogGroupMemberPage) |
| 459 | { |
| 460 | proc->clogGroupMember = false; |
| 461 | return false; |
| 462 | } |
| 463 | |
| 464 | pg_atomic_write_u32(&proc->clogGroupNext, nextidx); |
| 465 | |
| 466 | if (pg_atomic_compare_exchange_u32(&procglobal->clogGroupFirst, |
| 467 | &nextidx, |
| 468 | (uint32) proc->pgprocno)) |
| 469 | break; |
| 470 | } |
| 471 | |
| 472 | /* |
| 473 | * If the list was not empty, the leader will update the status of our |
| 474 | * XID. It is impossible to have followers without a leader because the |
| 475 | * first process that has added itself to the list will always have |
| 476 | * nextidx as INVALID_PGPROCNO. |
| 477 | */ |
| 478 | if (nextidx != INVALID_PGPROCNO) |
| 479 | { |
| 480 | int = 0; |
| 481 | |
| 482 | /* Sleep until the leader updates our XID status. */ |
| 483 | pgstat_report_wait_start(WAIT_EVENT_CLOG_GROUP_UPDATE); |
| 484 | for (;;) |
| 485 | { |
| 486 | /* acts as a read barrier */ |
| 487 | PGSemaphoreLock(proc->sem); |
| 488 | if (!proc->clogGroupMember) |
| 489 | break; |
| 490 | extraWaits++; |
| 491 | } |
| 492 | pgstat_report_wait_end(); |
| 493 | |
| 494 | Assert(pg_atomic_read_u32(&proc->clogGroupNext) == INVALID_PGPROCNO); |
| 495 | |
| 496 | /* Fix semaphore count for any absorbed wakeups */ |
| 497 | while (extraWaits-- > 0) |
| 498 | PGSemaphoreUnlock(proc->sem); |
| 499 | return true; |
| 500 | } |
| 501 | |
| 502 | /* We are the leader. Acquire the lock on behalf of everyone. */ |
| 503 | LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); |
| 504 | |
| 505 | /* |
| 506 | * Now that we've got the lock, clear the list of processes waiting for |
| 507 | * group XID status update, saving a pointer to the head of the list. |
| 508 | * Trying to pop elements one at a time could lead to an ABA problem. |
| 509 | */ |
| 510 | nextidx = pg_atomic_exchange_u32(&procglobal->clogGroupFirst, |
| 511 | INVALID_PGPROCNO); |
| 512 | |
| 513 | /* Remember head of list so we can perform wakeups after dropping lock. */ |
| 514 | wakeidx = nextidx; |
| 515 | |
| 516 | /* Walk the list and update the status of all XIDs. */ |
| 517 | while (nextidx != INVALID_PGPROCNO) |
| 518 | { |
| 519 | PGPROC *proc = &ProcGlobal->allProcs[nextidx]; |
| 520 | PGXACT *pgxact = &ProcGlobal->allPgXact[nextidx]; |
| 521 | |
| 522 | /* |
| 523 | * Overflowed transactions should not use group XID status update |
| 524 | * mechanism. |
| 525 | */ |
| 526 | Assert(!pgxact->overflowed); |
| 527 | |
| 528 | TransactionIdSetPageStatusInternal(proc->clogGroupMemberXid, |
| 529 | pgxact->nxids, |
| 530 | proc->subxids.xids, |
| 531 | proc->clogGroupMemberXidStatus, |
| 532 | proc->clogGroupMemberLsn, |
| 533 | proc->clogGroupMemberPage); |
| 534 | |
| 535 | /* Move to next proc in list. */ |
| 536 | nextidx = pg_atomic_read_u32(&proc->clogGroupNext); |
| 537 | } |
| 538 | |
| 539 | /* We're done with the lock now. */ |
| 540 | LWLockRelease(CLogControlLock); |
| 541 | |
| 542 | /* |
| 543 | * Now that we've released the lock, go back and wake everybody up. We |
| 544 | * don't do this under the lock so as to keep lock hold times to a |
| 545 | * minimum. |
| 546 | */ |
| 547 | while (wakeidx != INVALID_PGPROCNO) |
| 548 | { |
| 549 | PGPROC *proc = &ProcGlobal->allProcs[wakeidx]; |
| 550 | |
| 551 | wakeidx = pg_atomic_read_u32(&proc->clogGroupNext); |
| 552 | pg_atomic_write_u32(&proc->clogGroupNext, INVALID_PGPROCNO); |
| 553 | |
| 554 | /* ensure all previous writes are visible before follower continues. */ |
| 555 | pg_write_barrier(); |
| 556 | |
| 557 | proc->clogGroupMember = false; |
| 558 | |
| 559 | if (proc != MyProc) |
| 560 | PGSemaphoreUnlock(proc->sem); |
| 561 | } |
| 562 | |
| 563 | return true; |
| 564 | } |
| 565 | |
| 566 | /* |
| 567 | * Sets the commit status of a single transaction. |
| 568 | * |
| 569 | * Must be called with CLogControlLock held |
| 570 | */ |
| 571 | static void |
| 572 | TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno) |
| 573 | { |
| 574 | int byteno = TransactionIdToByte(xid); |
| 575 | int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; |
| 576 | char *byteptr; |
| 577 | char byteval; |
| 578 | char curval; |
| 579 | |
| 580 | byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; |
| 581 | curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK; |
| 582 | |
| 583 | /* |
| 584 | * When replaying transactions during recovery we still need to perform |
| 585 | * the two phases of subcommit and then commit. However, some transactions |
| 586 | * are already correctly marked, so we just treat those as a no-op which |
| 587 | * allows us to keep the following Assert as restrictive as possible. |
| 588 | */ |
| 589 | if (InRecovery && status == TRANSACTION_STATUS_SUB_COMMITTED && |
| 590 | curval == TRANSACTION_STATUS_COMMITTED) |
| 591 | return; |
| 592 | |
| 593 | /* |
| 594 | * Current state change should be from 0 or subcommitted to target state |
| 595 | * or we should already be there when replaying changes during recovery. |
| 596 | */ |
| 597 | Assert(curval == 0 || |
| 598 | (curval == TRANSACTION_STATUS_SUB_COMMITTED && |
| 599 | status != TRANSACTION_STATUS_IN_PROGRESS) || |
| 600 | curval == status); |
| 601 | |
| 602 | /* note this assumes exclusive access to the clog page */ |
| 603 | byteval = *byteptr; |
| 604 | byteval &= ~(((1 << CLOG_BITS_PER_XACT) - 1) << bshift); |
| 605 | byteval |= (status << bshift); |
| 606 | *byteptr = byteval; |
| 607 | |
| 608 | /* |
| 609 | * Update the group LSN if the transaction completion LSN is higher. |
| 610 | * |
| 611 | * Note: lsn will be invalid when supplied during InRecovery processing, |
| 612 | * so we don't need to do anything special to avoid LSN updates during |
| 613 | * recovery. After recovery completes the next clog change will set the |
| 614 | * LSN correctly. |
| 615 | */ |
| 616 | if (!XLogRecPtrIsInvalid(lsn)) |
| 617 | { |
| 618 | int lsnindex = GetLSNIndex(slotno, xid); |
| 619 | |
| 620 | if (ClogCtl->shared->group_lsn[lsnindex] < lsn) |
| 621 | ClogCtl->shared->group_lsn[lsnindex] = lsn; |
| 622 | } |
| 623 | } |
| 624 | |
| 625 | /* |
| 626 | * Interrogate the state of a transaction in the commit log. |
| 627 | * |
| 628 | * Aside from the actual commit status, this function returns (into *lsn) |
| 629 | * an LSN that is late enough to be able to guarantee that if we flush up to |
| 630 | * that LSN then we will have flushed the transaction's commit record to disk. |
| 631 | * The result is not necessarily the exact LSN of the transaction's commit |
| 632 | * record! For example, for long-past transactions (those whose clog pages |
| 633 | * already migrated to disk), we'll return InvalidXLogRecPtr. Also, because |
| 634 | * we group transactions on the same clog page to conserve storage, we might |
| 635 | * return the LSN of a later transaction that falls into the same group. |
| 636 | * |
| 637 | * NB: this is a low-level routine and is NOT the preferred entry point |
| 638 | * for most uses; TransactionLogFetch() in transam.c is the intended caller. |
| 639 | */ |
| 640 | XidStatus |
| 641 | TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) |
| 642 | { |
| 643 | int pageno = TransactionIdToPage(xid); |
| 644 | int byteno = TransactionIdToByte(xid); |
| 645 | int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; |
| 646 | int slotno; |
| 647 | int lsnindex; |
| 648 | char *byteptr; |
| 649 | XidStatus status; |
| 650 | |
| 651 | /* lock is acquired by SimpleLruReadPage_ReadOnly */ |
| 652 | |
| 653 | slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid); |
| 654 | byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; |
| 655 | |
| 656 | status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; |
| 657 | |
| 658 | lsnindex = GetLSNIndex(slotno, xid); |
| 659 | *lsn = ClogCtl->shared->group_lsn[lsnindex]; |
| 660 | |
| 661 | LWLockRelease(CLogControlLock); |
| 662 | |
| 663 | return status; |
| 664 | } |
| 665 | |
| 666 | /* |
| 667 | * Number of shared CLOG buffers. |
| 668 | * |
| 669 | * On larger multi-processor systems, it is possible to have many CLOG page |
| 670 | * requests in flight at one time which could lead to disk access for CLOG |
| 671 | * page if the required page is not found in memory. Testing revealed that we |
| 672 | * can get the best performance by having 128 CLOG buffers, more than that it |
| 673 | * doesn't improve performance. |
| 674 | * |
| 675 | * Unconditionally keeping the number of CLOG buffers to 128 did not seem like |
| 676 | * a good idea, because it would increase the minimum amount of shared memory |
| 677 | * required to start, which could be a problem for people running very small |
| 678 | * configurations. The following formula seems to represent a reasonable |
| 679 | * compromise: people with very low values for shared_buffers will get fewer |
| 680 | * CLOG buffers as well, and everyone else will get 128. |
| 681 | */ |
| 682 | Size |
| 683 | CLOGShmemBuffers(void) |
| 684 | { |
| 685 | return Min(128, Max(4, NBuffers / 512)); |
| 686 | } |
| 687 | |
| 688 | /* |
| 689 | * Initialization of shared memory for CLOG |
| 690 | */ |
| 691 | Size |
| 692 | CLOGShmemSize(void) |
| 693 | { |
| 694 | return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE); |
| 695 | } |
| 696 | |
| 697 | void |
| 698 | CLOGShmemInit(void) |
| 699 | { |
| 700 | ClogCtl->PagePrecedes = CLOGPagePrecedes; |
| 701 | SimpleLruInit(ClogCtl, "clog" , CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, |
| 702 | CLogControlLock, "pg_xact" , LWTRANCHE_CLOG_BUFFERS); |
| 703 | } |
| 704 | |
| 705 | /* |
| 706 | * This func must be called ONCE on system install. It creates |
| 707 | * the initial CLOG segment. (The CLOG directory is assumed to |
| 708 | * have been created by initdb, and CLOGShmemInit must have been |
| 709 | * called already.) |
| 710 | */ |
| 711 | void |
| 712 | BootStrapCLOG(void) |
| 713 | { |
| 714 | int slotno; |
| 715 | |
| 716 | LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); |
| 717 | |
| 718 | /* Create and zero the first page of the commit log */ |
| 719 | slotno = ZeroCLOGPage(0, false); |
| 720 | |
| 721 | /* Make sure it's written out */ |
| 722 | SimpleLruWritePage(ClogCtl, slotno); |
| 723 | Assert(!ClogCtl->shared->page_dirty[slotno]); |
| 724 | |
| 725 | LWLockRelease(CLogControlLock); |
| 726 | } |
| 727 | |
| 728 | /* |
| 729 | * Initialize (or reinitialize) a page of CLOG to zeroes. |
| 730 | * If writeXlog is true, also emit an XLOG record saying we did this. |
| 731 | * |
| 732 | * The page is not actually written, just set up in shared memory. |
| 733 | * The slot number of the new page is returned. |
| 734 | * |
| 735 | * Control lock must be held at entry, and will be held at exit. |
| 736 | */ |
| 737 | static int |
| 738 | ZeroCLOGPage(int pageno, bool writeXlog) |
| 739 | { |
| 740 | int slotno; |
| 741 | |
| 742 | slotno = SimpleLruZeroPage(ClogCtl, pageno); |
| 743 | |
| 744 | if (writeXlog) |
| 745 | WriteZeroPageXlogRec(pageno); |
| 746 | |
| 747 | return slotno; |
| 748 | } |
| 749 | |
| 750 | /* |
| 751 | * This must be called ONCE during postmaster or standalone-backend startup, |
| 752 | * after StartupXLOG has initialized ShmemVariableCache->nextFullXid. |
| 753 | */ |
| 754 | void |
| 755 | StartupCLOG(void) |
| 756 | { |
| 757 | TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid); |
| 758 | int pageno = TransactionIdToPage(xid); |
| 759 | |
| 760 | LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); |
| 761 | |
| 762 | /* |
| 763 | * Initialize our idea of the latest page number. |
| 764 | */ |
| 765 | ClogCtl->shared->latest_page_number = pageno; |
| 766 | |
| 767 | LWLockRelease(CLogControlLock); |
| 768 | } |
| 769 | |
| 770 | /* |
| 771 | * This must be called ONCE at the end of startup/recovery. |
| 772 | */ |
| 773 | void |
| 774 | TrimCLOG(void) |
| 775 | { |
| 776 | TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid); |
| 777 | int pageno = TransactionIdToPage(xid); |
| 778 | |
| 779 | LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); |
| 780 | |
| 781 | /* |
| 782 | * Re-Initialize our idea of the latest page number. |
| 783 | */ |
| 784 | ClogCtl->shared->latest_page_number = pageno; |
| 785 | |
| 786 | /* |
| 787 | * Zero out the remainder of the current clog page. Under normal |
| 788 | * circumstances it should be zeroes already, but it seems at least |
| 789 | * theoretically possible that XLOG replay will have settled on a nextXID |
| 790 | * value that is less than the last XID actually used and marked by the |
| 791 | * previous database lifecycle (since subtransaction commit writes clog |
| 792 | * but makes no WAL entry). Let's just be safe. (We need not worry about |
| 793 | * pages beyond the current one, since those will be zeroed when first |
| 794 | * used. For the same reason, there is no need to do anything when |
| 795 | * nextFullXid is exactly at a page boundary; and it's likely that the |
| 796 | * "current" page doesn't exist yet in that case.) |
| 797 | */ |
| 798 | if (TransactionIdToPgIndex(xid) != 0) |
| 799 | { |
| 800 | int byteno = TransactionIdToByte(xid); |
| 801 | int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; |
| 802 | int slotno; |
| 803 | char *byteptr; |
| 804 | |
| 805 | slotno = SimpleLruReadPage(ClogCtl, pageno, false, xid); |
| 806 | byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; |
| 807 | |
| 808 | /* Zero so-far-unused positions in the current byte */ |
| 809 | *byteptr &= (1 << bshift) - 1; |
| 810 | /* Zero the rest of the page */ |
| 811 | MemSet(byteptr + 1, 0, BLCKSZ - byteno - 1); |
| 812 | |
| 813 | ClogCtl->shared->page_dirty[slotno] = true; |
| 814 | } |
| 815 | |
| 816 | LWLockRelease(CLogControlLock); |
| 817 | } |
| 818 | |
| 819 | /* |
| 820 | * This must be called ONCE during postmaster or standalone-backend shutdown |
| 821 | */ |
| 822 | void |
| 823 | ShutdownCLOG(void) |
| 824 | { |
| 825 | /* Flush dirty CLOG pages to disk */ |
| 826 | TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false); |
| 827 | SimpleLruFlush(ClogCtl, false); |
| 828 | |
| 829 | /* |
| 830 | * fsync pg_xact to ensure that any files flushed previously are durably |
| 831 | * on disk. |
| 832 | */ |
| 833 | fsync_fname("pg_xact" , true); |
| 834 | |
| 835 | TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false); |
| 836 | } |
| 837 | |
| 838 | /* |
| 839 | * Perform a checkpoint --- either during shutdown, or on-the-fly |
| 840 | */ |
| 841 | void |
| 842 | CheckPointCLOG(void) |
| 843 | { |
| 844 | /* Flush dirty CLOG pages to disk */ |
| 845 | TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true); |
| 846 | SimpleLruFlush(ClogCtl, true); |
| 847 | |
| 848 | /* |
| 849 | * fsync pg_xact to ensure that any files flushed previously are durably |
| 850 | * on disk. |
| 851 | */ |
| 852 | fsync_fname("pg_xact" , true); |
| 853 | |
| 854 | TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true); |
| 855 | } |
| 856 | |
| 857 | |
| 858 | /* |
| 859 | * Make sure that CLOG has room for a newly-allocated XID. |
| 860 | * |
| 861 | * NB: this is called while holding XidGenLock. We want it to be very fast |
| 862 | * most of the time; even when it's not so fast, no actual I/O need happen |
| 863 | * unless we're forced to write out a dirty clog or xlog page to make room |
| 864 | * in shared memory. |
| 865 | */ |
| 866 | void |
| 867 | ExtendCLOG(TransactionId newestXact) |
| 868 | { |
| 869 | int pageno; |
| 870 | |
| 871 | /* |
| 872 | * No work except at first XID of a page. But beware: just after |
| 873 | * wraparound, the first XID of page zero is FirstNormalTransactionId. |
| 874 | */ |
| 875 | if (TransactionIdToPgIndex(newestXact) != 0 && |
| 876 | !TransactionIdEquals(newestXact, FirstNormalTransactionId)) |
| 877 | return; |
| 878 | |
| 879 | pageno = TransactionIdToPage(newestXact); |
| 880 | |
| 881 | LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); |
| 882 | |
| 883 | /* Zero the page and make an XLOG entry about it */ |
| 884 | ZeroCLOGPage(pageno, true); |
| 885 | |
| 886 | LWLockRelease(CLogControlLock); |
| 887 | } |
| 888 | |
| 889 | |
| 890 | /* |
| 891 | * Remove all CLOG segments before the one holding the passed transaction ID |
| 892 | * |
| 893 | * Before removing any CLOG data, we must flush XLOG to disk, to ensure |
| 894 | * that any recently-emitted HEAP_FREEZE records have reached disk; otherwise |
| 895 | * a crash and restart might leave us with some unfrozen tuples referencing |
| 896 | * removed CLOG data. We choose to emit a special TRUNCATE XLOG record too. |
| 897 | * Replaying the deletion from XLOG is not critical, since the files could |
| 898 | * just as well be removed later, but doing so prevents a long-running hot |
| 899 | * standby server from acquiring an unreasonably bloated CLOG directory. |
| 900 | * |
| 901 | * Since CLOG segments hold a large number of transactions, the opportunity to |
| 902 | * actually remove a segment is fairly rare, and so it seems best not to do |
| 903 | * the XLOG flush unless we have confirmed that there is a removable segment. |
| 904 | */ |
| 905 | void |
| 906 | TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) |
| 907 | { |
| 908 | int cutoffPage; |
| 909 | |
| 910 | /* |
| 911 | * The cutoff point is the start of the segment containing oldestXact. We |
| 912 | * pass the *page* containing oldestXact to SimpleLruTruncate. |
| 913 | */ |
| 914 | cutoffPage = TransactionIdToPage(oldestXact); |
| 915 | |
| 916 | /* Check to see if there's any files that could be removed */ |
| 917 | if (!SlruScanDirectory(ClogCtl, SlruScanDirCbReportPresence, &cutoffPage)) |
| 918 | return; /* nothing to remove */ |
| 919 | |
| 920 | /* |
| 921 | * Advance oldestClogXid before truncating clog, so concurrent xact status |
| 922 | * lookups can ensure they don't attempt to access truncated-away clog. |
| 923 | * |
| 924 | * It's only necessary to do this if we will actually truncate away clog |
| 925 | * pages. |
| 926 | */ |
| 927 | AdvanceOldestClogXid(oldestXact); |
| 928 | |
| 929 | /* |
| 930 | * Write XLOG record and flush XLOG to disk. We record the oldest xid |
| 931 | * we're keeping information about here so we can ensure that it's always |
| 932 | * ahead of clog truncation in case we crash, and so a standby finds out |
| 933 | * the new valid xid before the next checkpoint. |
| 934 | */ |
| 935 | WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid); |
| 936 | |
| 937 | /* Now we can remove the old CLOG segment(s) */ |
| 938 | SimpleLruTruncate(ClogCtl, cutoffPage); |
| 939 | } |
| 940 | |
| 941 | |
| 942 | /* |
| 943 | * Decide which of two CLOG page numbers is "older" for truncation purposes. |
| 944 | * |
| 945 | * We need to use comparison of TransactionIds here in order to do the right |
| 946 | * thing with wraparound XID arithmetic. However, if we are asked about |
| 947 | * page number zero, we don't want to hand InvalidTransactionId to |
| 948 | * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, |
| 949 | * offset both xids by FirstNormalTransactionId to avoid that. |
| 950 | */ |
| 951 | static bool |
| 952 | CLOGPagePrecedes(int page1, int page2) |
| 953 | { |
| 954 | TransactionId xid1; |
| 955 | TransactionId xid2; |
| 956 | |
| 957 | xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE; |
| 958 | xid1 += FirstNormalTransactionId; |
| 959 | xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE; |
| 960 | xid2 += FirstNormalTransactionId; |
| 961 | |
| 962 | return TransactionIdPrecedes(xid1, xid2); |
| 963 | } |
| 964 | |
| 965 | |
| 966 | /* |
| 967 | * Write a ZEROPAGE xlog record |
| 968 | */ |
| 969 | static void |
| 970 | WriteZeroPageXlogRec(int pageno) |
| 971 | { |
| 972 | XLogBeginInsert(); |
| 973 | XLogRegisterData((char *) (&pageno), sizeof(int)); |
| 974 | (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); |
| 975 | } |
| 976 | |
| 977 | /* |
| 978 | * Write a TRUNCATE xlog record |
| 979 | * |
| 980 | * We must flush the xlog record to disk before returning --- see notes |
| 981 | * in TruncateCLOG(). |
| 982 | */ |
| 983 | static void |
| 984 | WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb) |
| 985 | { |
| 986 | XLogRecPtr recptr; |
| 987 | xl_clog_truncate xlrec; |
| 988 | |
| 989 | xlrec.pageno = pageno; |
| 990 | xlrec.oldestXact = oldestXact; |
| 991 | xlrec.oldestXactDb = oldestXactDb; |
| 992 | |
| 993 | XLogBeginInsert(); |
| 994 | XLogRegisterData((char *) (&xlrec), sizeof(xl_clog_truncate)); |
| 995 | recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE); |
| 996 | XLogFlush(recptr); |
| 997 | } |
| 998 | |
| 999 | /* |
| 1000 | * CLOG resource manager's routines |
| 1001 | */ |
| 1002 | void |
| 1003 | clog_redo(XLogReaderState *record) |
| 1004 | { |
| 1005 | uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; |
| 1006 | |
| 1007 | /* Backup blocks are not used in clog records */ |
| 1008 | Assert(!XLogRecHasAnyBlockRefs(record)); |
| 1009 | |
| 1010 | if (info == CLOG_ZEROPAGE) |
| 1011 | { |
| 1012 | int pageno; |
| 1013 | int slotno; |
| 1014 | |
| 1015 | memcpy(&pageno, XLogRecGetData(record), sizeof(int)); |
| 1016 | |
| 1017 | LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); |
| 1018 | |
| 1019 | slotno = ZeroCLOGPage(pageno, false); |
| 1020 | SimpleLruWritePage(ClogCtl, slotno); |
| 1021 | Assert(!ClogCtl->shared->page_dirty[slotno]); |
| 1022 | |
| 1023 | LWLockRelease(CLogControlLock); |
| 1024 | } |
| 1025 | else if (info == CLOG_TRUNCATE) |
| 1026 | { |
| 1027 | xl_clog_truncate xlrec; |
| 1028 | |
| 1029 | memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_clog_truncate)); |
| 1030 | |
| 1031 | /* |
| 1032 | * During XLOG replay, latest_page_number isn't set up yet; insert a |
| 1033 | * suitable value to bypass the sanity test in SimpleLruTruncate. |
| 1034 | */ |
| 1035 | ClogCtl->shared->latest_page_number = xlrec.pageno; |
| 1036 | |
| 1037 | AdvanceOldestClogXid(xlrec.oldestXact); |
| 1038 | |
| 1039 | SimpleLruTruncate(ClogCtl, xlrec.pageno); |
| 1040 | } |
| 1041 | else |
| 1042 | elog(PANIC, "clog_redo: unknown op code %u" , info); |
| 1043 | } |
| 1044 | |