| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * storage.c |
| 4 | * code to create and destroy physical storage for relations |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/catalog/storage.c |
| 12 | * |
| 13 | * NOTES |
| 14 | * Some of this code used to be in storage/smgr/smgr.c, and the |
| 15 | * function names still reflect that. |
| 16 | * |
| 17 | *------------------------------------------------------------------------- |
| 18 | */ |
| 19 | |
| 20 | #include "postgres.h" |
| 21 | |
| 22 | #include "miscadmin.h" |
| 23 | |
| 24 | #include "access/visibilitymap.h" |
| 25 | #include "access/xact.h" |
| 26 | #include "access/xlog.h" |
| 27 | #include "access/xloginsert.h" |
| 28 | #include "access/xlogutils.h" |
| 29 | #include "catalog/storage.h" |
| 30 | #include "catalog/storage_xlog.h" |
| 31 | #include "storage/freespace.h" |
| 32 | #include "storage/smgr.h" |
| 33 | #include "utils/memutils.h" |
| 34 | #include "utils/rel.h" |
| 35 | |
| 36 | /* |
| 37 | * We keep a list of all relations (represented as RelFileNode values) |
| 38 | * that have been created or deleted in the current transaction. When |
| 39 | * a relation is created, we create the physical file immediately, but |
| 40 | * remember it so that we can delete the file again if the current |
| 41 | * transaction is aborted. Conversely, a deletion request is NOT |
| 42 | * executed immediately, but is just entered in the list. When and if |
| 43 | * the transaction commits, we can delete the physical file. |
| 44 | * |
| 45 | * To handle subtransactions, every entry is marked with its transaction |
| 46 | * nesting level. At subtransaction commit, we reassign the subtransaction's |
| 47 | * entries to the parent nesting level. At subtransaction abort, we can |
| 48 | * immediately execute the abort-time actions for all entries of the current |
| 49 | * nesting level. |
| 50 | * |
| 51 | * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear |
| 52 | * unbetimes. It'd probably be OK to keep it in TopTransactionContext, |
| 53 | * but I'm being paranoid. |
| 54 | */ |
| 55 | |
| 56 | typedef struct PendingRelDelete |
| 57 | { |
| 58 | RelFileNode relnode; /* relation that may need to be deleted */ |
| 59 | BackendId backend; /* InvalidBackendId if not a temp rel */ |
| 60 | bool atCommit; /* T=delete at commit; F=delete at abort */ |
| 61 | int nestLevel; /* xact nesting level of request */ |
| 62 | struct PendingRelDelete *next; /* linked-list link */ |
| 63 | } PendingRelDelete; |
| 64 | |
| 65 | static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ |
| 66 | |
| 67 | /* |
| 68 | * RelationCreateStorage |
| 69 | * Create physical storage for a relation. |
| 70 | * |
| 71 | * Create the underlying disk file storage for the relation. This only |
| 72 | * creates the main fork; additional forks are created lazily by the |
| 73 | * modules that need them. |
| 74 | * |
| 75 | * This function is transactional. The creation is WAL-logged, and if the |
| 76 | * transaction aborts later on, the storage will be destroyed. |
| 77 | */ |
| 78 | SMgrRelation |
| 79 | RelationCreateStorage(RelFileNode rnode, char relpersistence) |
| 80 | { |
| 81 | PendingRelDelete *pending; |
| 82 | SMgrRelation srel; |
| 83 | BackendId backend; |
| 84 | bool needs_wal; |
| 85 | |
| 86 | switch (relpersistence) |
| 87 | { |
| 88 | case RELPERSISTENCE_TEMP: |
| 89 | backend = BackendIdForTempRelations(); |
| 90 | needs_wal = false; |
| 91 | break; |
| 92 | case RELPERSISTENCE_UNLOGGED: |
| 93 | backend = InvalidBackendId; |
| 94 | needs_wal = false; |
| 95 | break; |
| 96 | case RELPERSISTENCE_PERMANENT: |
| 97 | backend = InvalidBackendId; |
| 98 | needs_wal = true; |
| 99 | break; |
| 100 | default: |
| 101 | elog(ERROR, "invalid relpersistence: %c" , relpersistence); |
| 102 | return NULL; /* placate compiler */ |
| 103 | } |
| 104 | |
| 105 | srel = smgropen(rnode, backend); |
| 106 | smgrcreate(srel, MAIN_FORKNUM, false); |
| 107 | |
| 108 | if (needs_wal) |
| 109 | log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM); |
| 110 | |
| 111 | /* Add the relation to the list of stuff to delete at abort */ |
| 112 | pending = (PendingRelDelete *) |
| 113 | MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); |
| 114 | pending->relnode = rnode; |
| 115 | pending->backend = backend; |
| 116 | pending->atCommit = false; /* delete if abort */ |
| 117 | pending->nestLevel = GetCurrentTransactionNestLevel(); |
| 118 | pending->next = pendingDeletes; |
| 119 | pendingDeletes = pending; |
| 120 | |
| 121 | return srel; |
| 122 | } |
| 123 | |
| 124 | /* |
| 125 | * Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL. |
| 126 | */ |
| 127 | void |
| 128 | log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum) |
| 129 | { |
| 130 | xl_smgr_create xlrec; |
| 131 | |
| 132 | /* |
| 133 | * Make an XLOG entry reporting the file creation. |
| 134 | */ |
| 135 | xlrec.rnode = *rnode; |
| 136 | xlrec.forkNum = forkNum; |
| 137 | |
| 138 | XLogBeginInsert(); |
| 139 | XLogRegisterData((char *) &xlrec, sizeof(xlrec)); |
| 140 | XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE); |
| 141 | } |
| 142 | |
| 143 | /* |
| 144 | * RelationDropStorage |
| 145 | * Schedule unlinking of physical storage at transaction commit. |
| 146 | */ |
| 147 | void |
| 148 | RelationDropStorage(Relation rel) |
| 149 | { |
| 150 | PendingRelDelete *pending; |
| 151 | |
| 152 | /* Add the relation to the list of stuff to delete at commit */ |
| 153 | pending = (PendingRelDelete *) |
| 154 | MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); |
| 155 | pending->relnode = rel->rd_node; |
| 156 | pending->backend = rel->rd_backend; |
| 157 | pending->atCommit = true; /* delete if commit */ |
| 158 | pending->nestLevel = GetCurrentTransactionNestLevel(); |
| 159 | pending->next = pendingDeletes; |
| 160 | pendingDeletes = pending; |
| 161 | |
| 162 | /* |
| 163 | * NOTE: if the relation was created in this transaction, it will now be |
| 164 | * present in the pending-delete list twice, once with atCommit true and |
| 165 | * once with atCommit false. Hence, it will be physically deleted at end |
| 166 | * of xact in either case (and the other entry will be ignored by |
| 167 | * smgrDoPendingDeletes, so no error will occur). We could instead remove |
| 168 | * the existing list entry and delete the physical file immediately, but |
| 169 | * for now I'll keep the logic simple. |
| 170 | */ |
| 171 | |
| 172 | RelationCloseSmgr(rel); |
| 173 | } |
| 174 | |
| 175 | /* |
| 176 | * RelationPreserveStorage |
| 177 | * Mark a relation as not to be deleted after all. |
| 178 | * |
| 179 | * We need this function because relation mapping changes are committed |
| 180 | * separately from commit of the whole transaction, so it's still possible |
| 181 | * for the transaction to abort after the mapping update is done. |
| 182 | * When a new physical relation is installed in the map, it would be |
| 183 | * scheduled for delete-on-abort, so we'd delete it, and be in trouble. |
| 184 | * The relation mapper fixes this by telling us to not delete such relations |
| 185 | * after all as part of its commit. |
| 186 | * |
| 187 | * We also use this to reuse an old build of an index during ALTER TABLE, this |
| 188 | * time removing the delete-at-commit entry. |
| 189 | * |
| 190 | * No-op if the relation is not among those scheduled for deletion. |
| 191 | */ |
| 192 | void |
| 193 | RelationPreserveStorage(RelFileNode rnode, bool atCommit) |
| 194 | { |
| 195 | PendingRelDelete *pending; |
| 196 | PendingRelDelete *prev; |
| 197 | PendingRelDelete *next; |
| 198 | |
| 199 | prev = NULL; |
| 200 | for (pending = pendingDeletes; pending != NULL; pending = next) |
| 201 | { |
| 202 | next = pending->next; |
| 203 | if (RelFileNodeEquals(rnode, pending->relnode) |
| 204 | && pending->atCommit == atCommit) |
| 205 | { |
| 206 | /* unlink and delete list entry */ |
| 207 | if (prev) |
| 208 | prev->next = next; |
| 209 | else |
| 210 | pendingDeletes = next; |
| 211 | pfree(pending); |
| 212 | /* prev does not change */ |
| 213 | } |
| 214 | else |
| 215 | { |
| 216 | /* unrelated entry, don't touch it */ |
| 217 | prev = pending; |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | /* |
| 223 | * RelationTruncate |
| 224 | * Physically truncate a relation to the specified number of blocks. |
| 225 | * |
| 226 | * This includes getting rid of any buffers for the blocks that are to be |
| 227 | * dropped. |
| 228 | */ |
| 229 | void |
| 230 | RelationTruncate(Relation rel, BlockNumber nblocks) |
| 231 | { |
| 232 | bool fsm; |
| 233 | bool vm; |
| 234 | |
| 235 | /* Open it at the smgr level if not already done */ |
| 236 | RelationOpenSmgr(rel); |
| 237 | |
| 238 | /* |
| 239 | * Make sure smgr_targblock etc aren't pointing somewhere past new end |
| 240 | */ |
| 241 | rel->rd_smgr->smgr_targblock = InvalidBlockNumber; |
| 242 | rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber; |
| 243 | rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber; |
| 244 | |
| 245 | /* Truncate the FSM first if it exists */ |
| 246 | fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM); |
| 247 | if (fsm) |
| 248 | FreeSpaceMapTruncateRel(rel, nblocks); |
| 249 | |
| 250 | /* Truncate the visibility map too if it exists. */ |
| 251 | vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM); |
| 252 | if (vm) |
| 253 | visibilitymap_truncate(rel, nblocks); |
| 254 | |
| 255 | /* |
| 256 | * We WAL-log the truncation before actually truncating, which means |
| 257 | * trouble if the truncation fails. If we then crash, the WAL replay |
| 258 | * likely isn't going to succeed in the truncation either, and cause a |
| 259 | * PANIC. It's tempting to put a critical section here, but that cure |
| 260 | * would be worse than the disease. It would turn a usually harmless |
| 261 | * failure to truncate, that might spell trouble at WAL replay, into a |
| 262 | * certain PANIC. |
| 263 | */ |
| 264 | if (RelationNeedsWAL(rel)) |
| 265 | { |
| 266 | /* |
| 267 | * Make an XLOG entry reporting the file truncation. |
| 268 | */ |
| 269 | XLogRecPtr lsn; |
| 270 | xl_smgr_truncate xlrec; |
| 271 | |
| 272 | xlrec.blkno = nblocks; |
| 273 | xlrec.rnode = rel->rd_node; |
| 274 | xlrec.flags = SMGR_TRUNCATE_ALL; |
| 275 | |
| 276 | XLogBeginInsert(); |
| 277 | XLogRegisterData((char *) &xlrec, sizeof(xlrec)); |
| 278 | |
| 279 | lsn = XLogInsert(RM_SMGR_ID, |
| 280 | XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE); |
| 281 | |
| 282 | /* |
| 283 | * Flush, because otherwise the truncation of the main relation might |
| 284 | * hit the disk before the WAL record, and the truncation of the FSM |
| 285 | * or visibility map. If we crashed during that window, we'd be left |
| 286 | * with a truncated heap, but the FSM or visibility map would still |
| 287 | * contain entries for the non-existent heap pages. |
| 288 | */ |
| 289 | if (fsm || vm) |
| 290 | XLogFlush(lsn); |
| 291 | } |
| 292 | |
| 293 | /* Do the real work */ |
| 294 | smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks); |
| 295 | } |
| 296 | |
| 297 | /* |
| 298 | * Copy a fork's data, block by block. |
| 299 | * |
| 300 | * Note that this requires that there is no dirty data in shared buffers. If |
| 301 | * it's possible that there are, callers need to flush those using |
| 302 | * e.g. FlushRelationBuffers(rel). |
| 303 | */ |
| 304 | void |
| 305 | RelationCopyStorage(SMgrRelation src, SMgrRelation dst, |
| 306 | ForkNumber forkNum, char relpersistence) |
| 307 | { |
| 308 | PGAlignedBlock buf; |
| 309 | Page page; |
| 310 | bool use_wal; |
| 311 | bool copying_initfork; |
| 312 | BlockNumber nblocks; |
| 313 | BlockNumber blkno; |
| 314 | |
| 315 | page = (Page) buf.data; |
| 316 | |
| 317 | /* |
| 318 | * The init fork for an unlogged relation in many respects has to be |
| 319 | * treated the same as normal relation, changes need to be WAL logged and |
| 320 | * it needs to be synced to disk. |
| 321 | */ |
| 322 | copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED && |
| 323 | forkNum == INIT_FORKNUM; |
| 324 | |
| 325 | /* |
| 326 | * We need to log the copied data in WAL iff WAL archiving/streaming is |
| 327 | * enabled AND it's a permanent relation. |
| 328 | */ |
| 329 | use_wal = XLogIsNeeded() && |
| 330 | (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork); |
| 331 | |
| 332 | nblocks = smgrnblocks(src, forkNum); |
| 333 | |
| 334 | for (blkno = 0; blkno < nblocks; blkno++) |
| 335 | { |
| 336 | /* If we got a cancel signal during the copy of the data, quit */ |
| 337 | CHECK_FOR_INTERRUPTS(); |
| 338 | |
| 339 | smgrread(src, forkNum, blkno, buf.data); |
| 340 | |
| 341 | if (!PageIsVerified(page, blkno)) |
| 342 | ereport(ERROR, |
| 343 | (errcode(ERRCODE_DATA_CORRUPTED), |
| 344 | errmsg("invalid page in block %u of relation %s" , |
| 345 | blkno, |
| 346 | relpathbackend(src->smgr_rnode.node, |
| 347 | src->smgr_rnode.backend, |
| 348 | forkNum)))); |
| 349 | |
| 350 | /* |
| 351 | * WAL-log the copied page. Unfortunately we don't know what kind of a |
| 352 | * page this is, so we have to log the full page including any unused |
| 353 | * space. |
| 354 | */ |
| 355 | if (use_wal) |
| 356 | log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false); |
| 357 | |
| 358 | PageSetChecksumInplace(page, blkno); |
| 359 | |
| 360 | /* |
| 361 | * Now write the page. We say isTemp = true even if it's not a temp |
| 362 | * rel, because there's no need for smgr to schedule an fsync for this |
| 363 | * write; we'll do it ourselves below. |
| 364 | */ |
| 365 | smgrextend(dst, forkNum, blkno, buf.data, true); |
| 366 | } |
| 367 | |
| 368 | /* |
| 369 | * If the rel is WAL-logged, must fsync before commit. We use heap_sync |
| 370 | * to ensure that the toast table gets fsync'd too. (For a temp or |
| 371 | * unlogged rel we don't care since the data will be gone after a crash |
| 372 | * anyway.) |
| 373 | * |
| 374 | * It's obvious that we must do this when not WAL-logging the copy. It's |
| 375 | * less obvious that we have to do it even if we did WAL-log the copied |
| 376 | * pages. The reason is that since we're copying outside shared buffers, a |
| 377 | * CHECKPOINT occurring during the copy has no way to flush the previously |
| 378 | * written data to disk (indeed it won't know the new rel even exists). A |
| 379 | * crash later on would replay WAL from the checkpoint, therefore it |
| 380 | * wouldn't replay our earlier WAL entries. If we do not fsync those pages |
| 381 | * here, they might still not be on disk when the crash occurs. |
| 382 | */ |
| 383 | if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork) |
| 384 | smgrimmedsync(dst, forkNum); |
| 385 | } |
| 386 | |
| 387 | /* |
| 388 | * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact. |
| 389 | * |
| 390 | * This also runs when aborting a subxact; we want to clean up a failed |
| 391 | * subxact immediately. |
| 392 | * |
| 393 | * Note: It's possible that we're being asked to remove a relation that has |
| 394 | * no physical storage in any fork. In particular, it's possible that we're |
| 395 | * cleaning up an old temporary relation for which RemovePgTempFiles has |
| 396 | * already recovered the physical storage. |
| 397 | */ |
| 398 | void |
| 399 | smgrDoPendingDeletes(bool isCommit) |
| 400 | { |
| 401 | int nestLevel = GetCurrentTransactionNestLevel(); |
| 402 | PendingRelDelete *pending; |
| 403 | PendingRelDelete *prev; |
| 404 | PendingRelDelete *next; |
| 405 | int nrels = 0, |
| 406 | i = 0, |
| 407 | maxrels = 0; |
| 408 | SMgrRelation *srels = NULL; |
| 409 | |
| 410 | prev = NULL; |
| 411 | for (pending = pendingDeletes; pending != NULL; pending = next) |
| 412 | { |
| 413 | next = pending->next; |
| 414 | if (pending->nestLevel < nestLevel) |
| 415 | { |
| 416 | /* outer-level entries should not be processed yet */ |
| 417 | prev = pending; |
| 418 | } |
| 419 | else |
| 420 | { |
| 421 | /* unlink list entry first, so we don't retry on failure */ |
| 422 | if (prev) |
| 423 | prev->next = next; |
| 424 | else |
| 425 | pendingDeletes = next; |
| 426 | /* do deletion if called for */ |
| 427 | if (pending->atCommit == isCommit) |
| 428 | { |
| 429 | SMgrRelation srel; |
| 430 | |
| 431 | srel = smgropen(pending->relnode, pending->backend); |
| 432 | |
| 433 | /* allocate the initial array, or extend it, if needed */ |
| 434 | if (maxrels == 0) |
| 435 | { |
| 436 | maxrels = 8; |
| 437 | srels = palloc(sizeof(SMgrRelation) * maxrels); |
| 438 | } |
| 439 | else if (maxrels <= nrels) |
| 440 | { |
| 441 | maxrels *= 2; |
| 442 | srels = repalloc(srels, sizeof(SMgrRelation) * maxrels); |
| 443 | } |
| 444 | |
| 445 | srels[nrels++] = srel; |
| 446 | } |
| 447 | /* must explicitly free the list entry */ |
| 448 | pfree(pending); |
| 449 | /* prev does not change */ |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | if (nrels > 0) |
| 454 | { |
| 455 | smgrdounlinkall(srels, nrels, false); |
| 456 | |
| 457 | for (i = 0; i < nrels; i++) |
| 458 | smgrclose(srels[i]); |
| 459 | |
| 460 | pfree(srels); |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | /* |
| 465 | * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted. |
| 466 | * |
| 467 | * The return value is the number of relations scheduled for termination. |
| 468 | * *ptr is set to point to a freshly-palloc'd array of RelFileNodes. |
| 469 | * If there are no relations to be deleted, *ptr is set to NULL. |
| 470 | * |
| 471 | * Only non-temporary relations are included in the returned list. This is OK |
| 472 | * because the list is used only in contexts where temporary relations don't |
| 473 | * matter: we're either writing to the two-phase state file (and transactions |
| 474 | * that have touched temp tables can't be prepared) or we're writing to xlog |
| 475 | * (and all temporary files will be zapped if we restart anyway, so no need |
| 476 | * for redo to do it also). |
| 477 | * |
| 478 | * Note that the list does not include anything scheduled for termination |
| 479 | * by upper-level transactions. |
| 480 | */ |
| 481 | int |
| 482 | smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) |
| 483 | { |
| 484 | int nestLevel = GetCurrentTransactionNestLevel(); |
| 485 | int nrels; |
| 486 | RelFileNode *rptr; |
| 487 | PendingRelDelete *pending; |
| 488 | |
| 489 | nrels = 0; |
| 490 | for (pending = pendingDeletes; pending != NULL; pending = pending->next) |
| 491 | { |
| 492 | if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit |
| 493 | && pending->backend == InvalidBackendId) |
| 494 | nrels++; |
| 495 | } |
| 496 | if (nrels == 0) |
| 497 | { |
| 498 | *ptr = NULL; |
| 499 | return 0; |
| 500 | } |
| 501 | rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode)); |
| 502 | *ptr = rptr; |
| 503 | for (pending = pendingDeletes; pending != NULL; pending = pending->next) |
| 504 | { |
| 505 | if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit |
| 506 | && pending->backend == InvalidBackendId) |
| 507 | { |
| 508 | *rptr = pending->relnode; |
| 509 | rptr++; |
| 510 | } |
| 511 | } |
| 512 | return nrels; |
| 513 | } |
| 514 | |
| 515 | /* |
| 516 | * PostPrepare_smgr -- Clean up after a successful PREPARE |
| 517 | * |
| 518 | * What we have to do here is throw away the in-memory state about pending |
| 519 | * relation deletes. It's all been recorded in the 2PC state file and |
| 520 | * it's no longer smgr's job to worry about it. |
| 521 | */ |
| 522 | void |
| 523 | PostPrepare_smgr(void) |
| 524 | { |
| 525 | PendingRelDelete *pending; |
| 526 | PendingRelDelete *next; |
| 527 | |
| 528 | for (pending = pendingDeletes; pending != NULL; pending = next) |
| 529 | { |
| 530 | next = pending->next; |
| 531 | pendingDeletes = next; |
| 532 | /* must explicitly free the list entry */ |
| 533 | pfree(pending); |
| 534 | } |
| 535 | } |
| 536 | |
| 537 | |
| 538 | /* |
| 539 | * AtSubCommit_smgr() --- Take care of subtransaction commit. |
| 540 | * |
| 541 | * Reassign all items in the pending-deletes list to the parent transaction. |
| 542 | */ |
| 543 | void |
| 544 | AtSubCommit_smgr(void) |
| 545 | { |
| 546 | int nestLevel = GetCurrentTransactionNestLevel(); |
| 547 | PendingRelDelete *pending; |
| 548 | |
| 549 | for (pending = pendingDeletes; pending != NULL; pending = pending->next) |
| 550 | { |
| 551 | if (pending->nestLevel >= nestLevel) |
| 552 | pending->nestLevel = nestLevel - 1; |
| 553 | } |
| 554 | } |
| 555 | |
| 556 | /* |
| 557 | * AtSubAbort_smgr() --- Take care of subtransaction abort. |
| 558 | * |
| 559 | * Delete created relations and forget about deleted relations. |
| 560 | * We can execute these operations immediately because we know this |
| 561 | * subtransaction will not commit. |
| 562 | */ |
| 563 | void |
| 564 | AtSubAbort_smgr(void) |
| 565 | { |
| 566 | smgrDoPendingDeletes(false); |
| 567 | } |
| 568 | |
| 569 | void |
| 570 | smgr_redo(XLogReaderState *record) |
| 571 | { |
| 572 | XLogRecPtr lsn = record->EndRecPtr; |
| 573 | uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; |
| 574 | |
| 575 | /* Backup blocks are not used in smgr records */ |
| 576 | Assert(!XLogRecHasAnyBlockRefs(record)); |
| 577 | |
| 578 | if (info == XLOG_SMGR_CREATE) |
| 579 | { |
| 580 | xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); |
| 581 | SMgrRelation reln; |
| 582 | |
| 583 | reln = smgropen(xlrec->rnode, InvalidBackendId); |
| 584 | smgrcreate(reln, xlrec->forkNum, true); |
| 585 | } |
| 586 | else if (info == XLOG_SMGR_TRUNCATE) |
| 587 | { |
| 588 | xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record); |
| 589 | SMgrRelation reln; |
| 590 | Relation rel; |
| 591 | |
| 592 | reln = smgropen(xlrec->rnode, InvalidBackendId); |
| 593 | |
| 594 | /* |
| 595 | * Forcibly create relation if it doesn't exist (which suggests that |
| 596 | * it was dropped somewhere later in the WAL sequence). As in |
| 597 | * XLogReadBufferForRedo, we prefer to recreate the rel and replay the |
| 598 | * log as best we can until the drop is seen. |
| 599 | */ |
| 600 | smgrcreate(reln, MAIN_FORKNUM, true); |
| 601 | |
| 602 | /* |
| 603 | * Before we perform the truncation, update minimum recovery point to |
| 604 | * cover this WAL record. Once the relation is truncated, there's no |
| 605 | * going back. The buffer manager enforces the WAL-first rule for |
| 606 | * normal updates to relation files, so that the minimum recovery |
| 607 | * point is always updated before the corresponding change in the data |
| 608 | * file is flushed to disk. We have to do the same manually here. |
| 609 | * |
| 610 | * Doing this before the truncation means that if the truncation fails |
| 611 | * for some reason, you cannot start up the system even after restart, |
| 612 | * until you fix the underlying situation so that the truncation will |
| 613 | * succeed. Alternatively, we could update the minimum recovery point |
| 614 | * after truncation, but that would leave a small window where the |
| 615 | * WAL-first rule could be violated. |
| 616 | */ |
| 617 | XLogFlush(lsn); |
| 618 | |
| 619 | if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0) |
| 620 | { |
| 621 | smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); |
| 622 | |
| 623 | /* Also tell xlogutils.c about it */ |
| 624 | XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); |
| 625 | } |
| 626 | |
| 627 | /* Truncate FSM and VM too */ |
| 628 | rel = CreateFakeRelcacheEntry(xlrec->rnode); |
| 629 | |
| 630 | if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 && |
| 631 | smgrexists(reln, FSM_FORKNUM)) |
| 632 | FreeSpaceMapTruncateRel(rel, xlrec->blkno); |
| 633 | if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 && |
| 634 | smgrexists(reln, VISIBILITYMAP_FORKNUM)) |
| 635 | visibilitymap_truncate(rel, xlrec->blkno); |
| 636 | |
| 637 | FreeFakeRelcacheEntry(rel); |
| 638 | } |
| 639 | else |
| 640 | elog(PANIC, "smgr_redo: unknown op code %u" , info); |
| 641 | } |
| 642 | |