| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * smgr.c |
| 4 | * public interface routines to storage manager switch. |
| 5 | * |
| 6 | * All file system operations in POSTGRES dispatch through these |
| 7 | * routines. |
| 8 | * |
| 9 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 10 | * Portions Copyright (c) 1994, Regents of the University of California |
| 11 | * |
| 12 | * |
| 13 | * IDENTIFICATION |
| 14 | * src/backend/storage/smgr/smgr.c |
| 15 | * |
| 16 | *------------------------------------------------------------------------- |
| 17 | */ |
| 18 | #include "postgres.h" |
| 19 | |
| 20 | #include "commands/tablespace.h" |
| 21 | #include "lib/ilist.h" |
| 22 | #include "storage/bufmgr.h" |
| 23 | #include "storage/ipc.h" |
| 24 | #include "storage/md.h" |
| 25 | #include "storage/smgr.h" |
| 26 | #include "utils/hsearch.h" |
| 27 | #include "utils/inval.h" |
| 28 | |
| 29 | |
| 30 | /* |
| 31 | * This struct of function pointers defines the API between smgr.c and |
| 32 | * any individual storage manager module. Note that smgr subfunctions are |
| 33 | * generally expected to report problems via elog(ERROR). An exception is |
| 34 | * that smgr_unlink should use elog(WARNING), rather than erroring out, |
| 35 | * because we normally unlink relations during post-commit/abort cleanup, |
| 36 | * and so it's too late to raise an error. Also, various conditions that |
| 37 | * would normally be errors should be allowed during bootstrap and/or WAL |
| 38 | * recovery --- see comments in md.c for details. |
| 39 | */ |
| 40 | typedef struct f_smgr |
| 41 | { |
| 42 | void (*smgr_init) (void); /* may be NULL */ |
| 43 | void (*smgr_shutdown) (void); /* may be NULL */ |
| 44 | void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); |
| 45 | void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, |
| 46 | bool isRedo); |
| 47 | bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); |
| 48 | void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, |
| 49 | bool isRedo); |
| 50 | void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, |
| 51 | BlockNumber blocknum, char *buffer, bool skipFsync); |
| 52 | void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, |
| 53 | BlockNumber blocknum); |
| 54 | void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, |
| 55 | BlockNumber blocknum, char *buffer); |
| 56 | void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, |
| 57 | BlockNumber blocknum, char *buffer, bool skipFsync); |
| 58 | void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, |
| 59 | BlockNumber blocknum, BlockNumber nblocks); |
| 60 | BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); |
| 61 | void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, |
| 62 | BlockNumber nblocks); |
| 63 | void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); |
| 64 | } f_smgr; |
| 65 | |
| 66 | static const f_smgr smgrsw[] = { |
| 67 | /* magnetic disk */ |
| 68 | { |
| 69 | .smgr_init = mdinit, |
| 70 | .smgr_shutdown = NULL, |
| 71 | .smgr_close = mdclose, |
| 72 | .smgr_create = mdcreate, |
| 73 | .smgr_exists = mdexists, |
| 74 | .smgr_unlink = mdunlink, |
| 75 | .smgr_extend = mdextend, |
| 76 | .smgr_prefetch = mdprefetch, |
| 77 | .smgr_read = mdread, |
| 78 | .smgr_write = mdwrite, |
| 79 | .smgr_writeback = mdwriteback, |
| 80 | .smgr_nblocks = mdnblocks, |
| 81 | .smgr_truncate = mdtruncate, |
| 82 | .smgr_immedsync = mdimmedsync, |
| 83 | } |
| 84 | }; |
| 85 | |
| 86 | static const int NSmgr = lengthof(smgrsw); |
| 87 | |
| 88 | /* |
| 89 | * Each backend has a hashtable that stores all extant SMgrRelation objects. |
| 90 | * In addition, "unowned" SMgrRelation objects are chained together in a list. |
| 91 | */ |
| 92 | static HTAB *SMgrRelationHash = NULL; |
| 93 | |
| 94 | static dlist_head unowned_relns; |
| 95 | |
| 96 | /* local function prototypes */ |
| 97 | static void smgrshutdown(int code, Datum arg); |
| 98 | |
| 99 | |
| 100 | /* |
| 101 | * smgrinit(), smgrshutdown() -- Initialize or shut down storage |
| 102 | * managers. |
| 103 | * |
| 104 | * Note: smgrinit is called during backend startup (normal or standalone |
| 105 | * case), *not* during postmaster start. Therefore, any resources created |
| 106 | * here or destroyed in smgrshutdown are backend-local. |
| 107 | */ |
| 108 | void |
| 109 | smgrinit(void) |
| 110 | { |
| 111 | int i; |
| 112 | |
| 113 | for (i = 0; i < NSmgr; i++) |
| 114 | { |
| 115 | if (smgrsw[i].smgr_init) |
| 116 | smgrsw[i].smgr_init(); |
| 117 | } |
| 118 | |
| 119 | /* register the shutdown proc */ |
| 120 | on_proc_exit(smgrshutdown, 0); |
| 121 | } |
| 122 | |
| 123 | /* |
| 124 | * on_proc_exit hook for smgr cleanup during backend shutdown |
| 125 | */ |
| 126 | static void |
| 127 | smgrshutdown(int code, Datum arg) |
| 128 | { |
| 129 | int i; |
| 130 | |
| 131 | for (i = 0; i < NSmgr; i++) |
| 132 | { |
| 133 | if (smgrsw[i].smgr_shutdown) |
| 134 | smgrsw[i].smgr_shutdown(); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | /* |
| 139 | * smgropen() -- Return an SMgrRelation object, creating it if need be. |
| 140 | * |
| 141 | * This does not attempt to actually open the underlying file. |
| 142 | */ |
| 143 | SMgrRelation |
| 144 | smgropen(RelFileNode rnode, BackendId backend) |
| 145 | { |
| 146 | RelFileNodeBackend brnode; |
| 147 | SMgrRelation reln; |
| 148 | bool found; |
| 149 | |
| 150 | if (SMgrRelationHash == NULL) |
| 151 | { |
| 152 | /* First time through: initialize the hash table */ |
| 153 | HASHCTL ctl; |
| 154 | |
| 155 | MemSet(&ctl, 0, sizeof(ctl)); |
| 156 | ctl.keysize = sizeof(RelFileNodeBackend); |
| 157 | ctl.entrysize = sizeof(SMgrRelationData); |
| 158 | SMgrRelationHash = hash_create("smgr relation table" , 400, |
| 159 | &ctl, HASH_ELEM | HASH_BLOBS); |
| 160 | dlist_init(&unowned_relns); |
| 161 | } |
| 162 | |
| 163 | /* Look up or create an entry */ |
| 164 | brnode.node = rnode; |
| 165 | brnode.backend = backend; |
| 166 | reln = (SMgrRelation) hash_search(SMgrRelationHash, |
| 167 | (void *) &brnode, |
| 168 | HASH_ENTER, &found); |
| 169 | |
| 170 | /* Initialize it if not present before */ |
| 171 | if (!found) |
| 172 | { |
| 173 | int forknum; |
| 174 | |
| 175 | /* hash_search already filled in the lookup key */ |
| 176 | reln->smgr_owner = NULL; |
| 177 | reln->smgr_targblock = InvalidBlockNumber; |
| 178 | reln->smgr_fsm_nblocks = InvalidBlockNumber; |
| 179 | reln->smgr_vm_nblocks = InvalidBlockNumber; |
| 180 | reln->smgr_which = 0; /* we only have md.c at present */ |
| 181 | |
| 182 | /* mark it not open */ |
| 183 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
| 184 | reln->md_num_open_segs[forknum] = 0; |
| 185 | |
| 186 | /* it has no owner yet */ |
| 187 | dlist_push_tail(&unowned_relns, &reln->node); |
| 188 | } |
| 189 | |
| 190 | return reln; |
| 191 | } |
| 192 | |
| 193 | /* |
| 194 | * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object |
| 195 | * |
| 196 | * There can be only one owner at a time; this is sufficient since currently |
| 197 | * the only such owners exist in the relcache. |
| 198 | */ |
| 199 | void |
| 200 | smgrsetowner(SMgrRelation *owner, SMgrRelation reln) |
| 201 | { |
| 202 | /* We don't support "disowning" an SMgrRelation here, use smgrclearowner */ |
| 203 | Assert(owner != NULL); |
| 204 | |
| 205 | /* |
| 206 | * First, unhook any old owner. (Normally there shouldn't be any, but it |
| 207 | * seems possible that this can happen during swap_relation_files() |
| 208 | * depending on the order of processing. It's ok to close the old |
| 209 | * relcache entry early in that case.) |
| 210 | * |
| 211 | * If there isn't an old owner, then the reln should be in the unowned |
| 212 | * list, and we need to remove it. |
| 213 | */ |
| 214 | if (reln->smgr_owner) |
| 215 | *(reln->smgr_owner) = NULL; |
| 216 | else |
| 217 | dlist_delete(&reln->node); |
| 218 | |
| 219 | /* Now establish the ownership relationship. */ |
| 220 | reln->smgr_owner = owner; |
| 221 | *owner = reln; |
| 222 | } |
| 223 | |
| 224 | /* |
| 225 | * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object |
| 226 | * if one exists |
| 227 | */ |
| 228 | void |
| 229 | smgrclearowner(SMgrRelation *owner, SMgrRelation reln) |
| 230 | { |
| 231 | /* Do nothing if the SMgrRelation object is not owned by the owner */ |
| 232 | if (reln->smgr_owner != owner) |
| 233 | return; |
| 234 | |
| 235 | /* unset the owner's reference */ |
| 236 | *owner = NULL; |
| 237 | |
| 238 | /* unset our reference to the owner */ |
| 239 | reln->smgr_owner = NULL; |
| 240 | |
| 241 | /* add to list of unowned relations */ |
| 242 | dlist_push_tail(&unowned_relns, &reln->node); |
| 243 | } |
| 244 | |
| 245 | /* |
| 246 | * smgrexists() -- Does the underlying file for a fork exist? |
| 247 | */ |
| 248 | bool |
| 249 | smgrexists(SMgrRelation reln, ForkNumber forknum) |
| 250 | { |
| 251 | return smgrsw[reln->smgr_which].smgr_exists(reln, forknum); |
| 252 | } |
| 253 | |
| 254 | /* |
| 255 | * smgrclose() -- Close and delete an SMgrRelation object. |
| 256 | */ |
| 257 | void |
| 258 | smgrclose(SMgrRelation reln) |
| 259 | { |
| 260 | SMgrRelation *owner; |
| 261 | ForkNumber forknum; |
| 262 | |
| 263 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
| 264 | smgrsw[reln->smgr_which].smgr_close(reln, forknum); |
| 265 | |
| 266 | owner = reln->smgr_owner; |
| 267 | |
| 268 | if (!owner) |
| 269 | dlist_delete(&reln->node); |
| 270 | |
| 271 | if (hash_search(SMgrRelationHash, |
| 272 | (void *) &(reln->smgr_rnode), |
| 273 | HASH_REMOVE, NULL) == NULL) |
| 274 | elog(ERROR, "SMgrRelation hashtable corrupted" ); |
| 275 | |
| 276 | /* |
| 277 | * Unhook the owner pointer, if any. We do this last since in the remote |
| 278 | * possibility of failure above, the SMgrRelation object will still exist. |
| 279 | */ |
| 280 | if (owner) |
| 281 | *owner = NULL; |
| 282 | } |
| 283 | |
| 284 | /* |
| 285 | * smgrcloseall() -- Close all existing SMgrRelation objects. |
| 286 | */ |
| 287 | void |
| 288 | smgrcloseall(void) |
| 289 | { |
| 290 | HASH_SEQ_STATUS status; |
| 291 | SMgrRelation reln; |
| 292 | |
| 293 | /* Nothing to do if hashtable not set up */ |
| 294 | if (SMgrRelationHash == NULL) |
| 295 | return; |
| 296 | |
| 297 | hash_seq_init(&status, SMgrRelationHash); |
| 298 | |
| 299 | while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) |
| 300 | smgrclose(reln); |
| 301 | } |
| 302 | |
| 303 | /* |
| 304 | * smgrclosenode() -- Close SMgrRelation object for given RelFileNode, |
| 305 | * if one exists. |
| 306 | * |
| 307 | * This has the same effects as smgrclose(smgropen(rnode)), but it avoids |
| 308 | * uselessly creating a hashtable entry only to drop it again when no |
| 309 | * such entry exists already. |
| 310 | */ |
| 311 | void |
| 312 | smgrclosenode(RelFileNodeBackend rnode) |
| 313 | { |
| 314 | SMgrRelation reln; |
| 315 | |
| 316 | /* Nothing to do if hashtable not set up */ |
| 317 | if (SMgrRelationHash == NULL) |
| 318 | return; |
| 319 | |
| 320 | reln = (SMgrRelation) hash_search(SMgrRelationHash, |
| 321 | (void *) &rnode, |
| 322 | HASH_FIND, NULL); |
| 323 | if (reln != NULL) |
| 324 | smgrclose(reln); |
| 325 | } |
| 326 | |
| 327 | /* |
| 328 | * smgrcreate() -- Create a new relation. |
| 329 | * |
| 330 | * Given an already-created (but presumably unused) SMgrRelation, |
| 331 | * cause the underlying disk file or other storage for the fork |
| 332 | * to be created. |
| 333 | * |
| 334 | * If isRedo is true, it is okay for the underlying file to exist |
| 335 | * already because we are in a WAL replay sequence. |
| 336 | */ |
| 337 | void |
| 338 | smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) |
| 339 | { |
| 340 | /* |
| 341 | * Exit quickly in WAL replay mode if we've already opened the file. If |
| 342 | * it's open, it surely must exist. |
| 343 | */ |
| 344 | if (isRedo && reln->md_num_open_segs[forknum] > 0) |
| 345 | return; |
| 346 | |
| 347 | /* |
| 348 | * We may be using the target table space for the first time in this |
| 349 | * database, so create a per-database subdirectory if needed. |
| 350 | * |
| 351 | * XXX this is a fairly ugly violation of module layering, but this seems |
| 352 | * to be the best place to put the check. Maybe TablespaceCreateDbspace |
| 353 | * should be here and not in commands/tablespace.c? But that would imply |
| 354 | * importing a lot of stuff that smgr.c oughtn't know, either. |
| 355 | */ |
| 356 | TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode, |
| 357 | reln->smgr_rnode.node.dbNode, |
| 358 | isRedo); |
| 359 | |
| 360 | smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); |
| 361 | } |
| 362 | |
| 363 | /* |
| 364 | * smgrdounlink() -- Immediately unlink all forks of a relation. |
| 365 | * |
| 366 | * All forks of the relation are removed from the store. This should |
| 367 | * not be used during transactional operations, since it can't be undone. |
| 368 | * |
| 369 | * If isRedo is true, it is okay for the underlying file(s) to be gone |
| 370 | * already. |
| 371 | * |
| 372 | * This is equivalent to calling smgrdounlinkfork for each fork, but |
| 373 | * it's significantly quicker so should be preferred when possible. |
| 374 | */ |
| 375 | void |
| 376 | smgrdounlink(SMgrRelation reln, bool isRedo) |
| 377 | { |
| 378 | RelFileNodeBackend rnode = reln->smgr_rnode; |
| 379 | int which = reln->smgr_which; |
| 380 | ForkNumber forknum; |
| 381 | |
| 382 | /* Close the forks at smgr level */ |
| 383 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
| 384 | smgrsw[which].smgr_close(reln, forknum); |
| 385 | |
| 386 | /* |
| 387 | * Get rid of any remaining buffers for the relation. bufmgr will just |
| 388 | * drop them without bothering to write the contents. |
| 389 | */ |
| 390 | DropRelFileNodesAllBuffers(&rnode, 1); |
| 391 | |
| 392 | /* |
| 393 | * It'd be nice to tell the stats collector to forget it immediately, too. |
| 394 | * But we can't because we don't know the OID (and in cases involving |
| 395 | * relfilenode swaps, it's not always clear which table OID to forget, |
| 396 | * anyway). |
| 397 | */ |
| 398 | |
| 399 | /* |
| 400 | * Send a shared-inval message to force other backends to close any |
| 401 | * dangling smgr references they may have for this rel. We should do this |
| 402 | * before starting the actual unlinking, in case we fail partway through |
| 403 | * that step. Note that the sinval message will eventually come back to |
| 404 | * this backend, too, and thereby provide a backstop that we closed our |
| 405 | * own smgr rel. |
| 406 | */ |
| 407 | CacheInvalidateSmgr(rnode); |
| 408 | |
| 409 | /* |
| 410 | * Delete the physical file(s). |
| 411 | * |
| 412 | * Note: smgr_unlink must treat deletion failure as a WARNING, not an |
| 413 | * ERROR, because we've already decided to commit or abort the current |
| 414 | * xact. |
| 415 | */ |
| 416 | smgrsw[which].smgr_unlink(rnode, InvalidForkNumber, isRedo); |
| 417 | } |
| 418 | |
| 419 | /* |
| 420 | * smgrdounlinkall() -- Immediately unlink all forks of all given relations |
| 421 | * |
| 422 | * All forks of all given relations are removed from the store. This |
| 423 | * should not be used during transactional operations, since it can't be |
| 424 | * undone. |
| 425 | * |
| 426 | * If isRedo is true, it is okay for the underlying file(s) to be gone |
| 427 | * already. |
| 428 | * |
| 429 | * This is equivalent to calling smgrdounlink for each relation, but it's |
| 430 | * significantly quicker so should be preferred when possible. |
| 431 | */ |
| 432 | void |
| 433 | smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) |
| 434 | { |
| 435 | int i = 0; |
| 436 | RelFileNodeBackend *rnodes; |
| 437 | ForkNumber forknum; |
| 438 | |
| 439 | if (nrels == 0) |
| 440 | return; |
| 441 | |
| 442 | /* |
| 443 | * create an array which contains all relations to be dropped, and close |
| 444 | * each relation's forks at the smgr level while at it |
| 445 | */ |
| 446 | rnodes = palloc(sizeof(RelFileNodeBackend) * nrels); |
| 447 | for (i = 0; i < nrels; i++) |
| 448 | { |
| 449 | RelFileNodeBackend rnode = rels[i]->smgr_rnode; |
| 450 | int which = rels[i]->smgr_which; |
| 451 | |
| 452 | rnodes[i] = rnode; |
| 453 | |
| 454 | /* Close the forks at smgr level */ |
| 455 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
| 456 | smgrsw[which].smgr_close(rels[i], forknum); |
| 457 | } |
| 458 | |
| 459 | /* |
| 460 | * Get rid of any remaining buffers for the relations. bufmgr will just |
| 461 | * drop them without bothering to write the contents. |
| 462 | */ |
| 463 | DropRelFileNodesAllBuffers(rnodes, nrels); |
| 464 | |
| 465 | /* |
| 466 | * It'd be nice to tell the stats collector to forget them immediately, |
| 467 | * too. But we can't because we don't know the OIDs. |
| 468 | */ |
| 469 | |
| 470 | /* |
| 471 | * Send a shared-inval message to force other backends to close any |
| 472 | * dangling smgr references they may have for these rels. We should do |
| 473 | * this before starting the actual unlinking, in case we fail partway |
| 474 | * through that step. Note that the sinval messages will eventually come |
| 475 | * back to this backend, too, and thereby provide a backstop that we |
| 476 | * closed our own smgr rel. |
| 477 | */ |
| 478 | for (i = 0; i < nrels; i++) |
| 479 | CacheInvalidateSmgr(rnodes[i]); |
| 480 | |
| 481 | /* |
| 482 | * Delete the physical file(s). |
| 483 | * |
| 484 | * Note: smgr_unlink must treat deletion failure as a WARNING, not an |
| 485 | * ERROR, because we've already decided to commit or abort the current |
| 486 | * xact. |
| 487 | */ |
| 488 | |
| 489 | for (i = 0; i < nrels; i++) |
| 490 | { |
| 491 | int which = rels[i]->smgr_which; |
| 492 | |
| 493 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
| 494 | smgrsw[which].smgr_unlink(rnodes[i], forknum, isRedo); |
| 495 | } |
| 496 | |
| 497 | pfree(rnodes); |
| 498 | } |
| 499 | |
| 500 | /* |
| 501 | * smgrdounlinkfork() -- Immediately unlink one fork of a relation. |
| 502 | * |
| 503 | * The specified fork of the relation is removed from the store. This |
| 504 | * should not be used during transactional operations, since it can't be |
| 505 | * undone. |
| 506 | * |
| 507 | * If isRedo is true, it is okay for the underlying file to be gone |
| 508 | * already. |
| 509 | */ |
| 510 | void |
| 511 | smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo) |
| 512 | { |
| 513 | RelFileNodeBackend rnode = reln->smgr_rnode; |
| 514 | int which = reln->smgr_which; |
| 515 | |
| 516 | /* Close the fork at smgr level */ |
| 517 | smgrsw[which].smgr_close(reln, forknum); |
| 518 | |
| 519 | /* |
| 520 | * Get rid of any remaining buffers for the fork. bufmgr will just drop |
| 521 | * them without bothering to write the contents. |
| 522 | */ |
| 523 | DropRelFileNodeBuffers(rnode, forknum, 0); |
| 524 | |
| 525 | /* |
| 526 | * It'd be nice to tell the stats collector to forget it immediately, too. |
| 527 | * But we can't because we don't know the OID (and in cases involving |
| 528 | * relfilenode swaps, it's not always clear which table OID to forget, |
| 529 | * anyway). |
| 530 | */ |
| 531 | |
| 532 | /* |
| 533 | * Send a shared-inval message to force other backends to close any |
| 534 | * dangling smgr references they may have for this rel. We should do this |
| 535 | * before starting the actual unlinking, in case we fail partway through |
| 536 | * that step. Note that the sinval message will eventually come back to |
| 537 | * this backend, too, and thereby provide a backstop that we closed our |
| 538 | * own smgr rel. |
| 539 | */ |
| 540 | CacheInvalidateSmgr(rnode); |
| 541 | |
| 542 | /* |
| 543 | * Delete the physical file(s). |
| 544 | * |
| 545 | * Note: smgr_unlink must treat deletion failure as a WARNING, not an |
| 546 | * ERROR, because we've already decided to commit or abort the current |
| 547 | * xact. |
| 548 | */ |
| 549 | smgrsw[which].smgr_unlink(rnode, forknum, isRedo); |
| 550 | } |
| 551 | |
| 552 | /* |
| 553 | * smgrextend() -- Add a new block to a file. |
| 554 | * |
| 555 | * The semantics are nearly the same as smgrwrite(): write at the |
| 556 | * specified position. However, this is to be used for the case of |
| 557 | * extending a relation (i.e., blocknum is at or beyond the current |
| 558 | * EOF). Note that we assume writing a block beyond current EOF |
| 559 | * causes intervening file space to become filled with zeroes. |
| 560 | */ |
| 561 | void |
| 562 | smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
| 563 | char *buffer, bool skipFsync) |
| 564 | { |
| 565 | smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, |
| 566 | buffer, skipFsync); |
| 567 | } |
| 568 | |
| 569 | /* |
| 570 | * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation. |
| 571 | */ |
| 572 | void |
| 573 | smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) |
| 574 | { |
| 575 | smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum); |
| 576 | } |
| 577 | |
| 578 | /* |
| 579 | * smgrread() -- read a particular block from a relation into the supplied |
| 580 | * buffer. |
| 581 | * |
| 582 | * This routine is called from the buffer manager in order to |
| 583 | * instantiate pages in the shared buffer cache. All storage managers |
| 584 | * return pages in the format that POSTGRES expects. |
| 585 | */ |
| 586 | void |
| 587 | smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
| 588 | char *buffer) |
| 589 | { |
| 590 | smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer); |
| 591 | } |
| 592 | |
| 593 | /* |
| 594 | * smgrwrite() -- Write the supplied buffer out. |
| 595 | * |
| 596 | * This is to be used only for updating already-existing blocks of a |
| 597 | * relation (ie, those before the current EOF). To extend a relation, |
| 598 | * use smgrextend(). |
| 599 | * |
| 600 | * This is not a synchronous write -- the block is not necessarily |
| 601 | * on disk at return, only dumped out to the kernel. However, |
| 602 | * provisions will be made to fsync the write before the next checkpoint. |
| 603 | * |
| 604 | * skipFsync indicates that the caller will make other provisions to |
| 605 | * fsync the relation, so we needn't bother. Temporary relations also |
| 606 | * do not require fsync. |
| 607 | */ |
| 608 | void |
| 609 | smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
| 610 | char *buffer, bool skipFsync) |
| 611 | { |
| 612 | smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum, |
| 613 | buffer, skipFsync); |
| 614 | } |
| 615 | |
| 616 | |
| 617 | /* |
| 618 | * smgrwriteback() -- Trigger kernel writeback for the supplied range of |
| 619 | * blocks. |
| 620 | */ |
| 621 | void |
| 622 | smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
| 623 | BlockNumber nblocks) |
| 624 | { |
| 625 | smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, |
| 626 | nblocks); |
| 627 | } |
| 628 | |
| 629 | /* |
| 630 | * smgrnblocks() -- Calculate the number of blocks in the |
| 631 | * supplied relation. |
| 632 | */ |
| 633 | BlockNumber |
| 634 | smgrnblocks(SMgrRelation reln, ForkNumber forknum) |
| 635 | { |
| 636 | return smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); |
| 637 | } |
| 638 | |
| 639 | /* |
| 640 | * smgrtruncate() -- Truncate supplied relation to the specified number |
| 641 | * of blocks |
| 642 | * |
| 643 | * The truncation is done immediately, so this can't be rolled back. |
| 644 | */ |
| 645 | void |
| 646 | smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) |
| 647 | { |
| 648 | /* |
| 649 | * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will |
| 650 | * just drop them without bothering to write the contents. |
| 651 | */ |
| 652 | DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks); |
| 653 | |
| 654 | /* |
| 655 | * Send a shared-inval message to force other backends to close any smgr |
| 656 | * references they may have for this rel. This is useful because they |
| 657 | * might have open file pointers to segments that got removed, and/or |
| 658 | * smgr_targblock variables pointing past the new rel end. (The inval |
| 659 | * message will come back to our backend, too, causing a |
| 660 | * probably-unnecessary local smgr flush. But we don't expect that this |
| 661 | * is a performance-critical path.) As in the unlink code, we want to be |
| 662 | * sure the message is sent before we start changing things on-disk. |
| 663 | */ |
| 664 | CacheInvalidateSmgr(reln->smgr_rnode); |
| 665 | |
| 666 | /* |
| 667 | * Do the truncation. |
| 668 | */ |
| 669 | smgrsw[reln->smgr_which].smgr_truncate(reln, forknum, nblocks); |
| 670 | } |
| 671 | |
| 672 | /* |
| 673 | * smgrimmedsync() -- Force the specified relation to stable storage. |
| 674 | * |
| 675 | * Synchronously force all previous writes to the specified relation |
| 676 | * down to disk. |
| 677 | * |
| 678 | * This is useful for building completely new relations (eg, new |
| 679 | * indexes). Instead of incrementally WAL-logging the index build |
| 680 | * steps, we can just write completed index pages to disk with smgrwrite |
| 681 | * or smgrextend, and then fsync the completed index file before |
| 682 | * committing the transaction. (This is sufficient for purposes of |
| 683 | * crash recovery, since it effectively duplicates forcing a checkpoint |
| 684 | * for the completed index. But it is *not* sufficient if one wishes |
| 685 | * to use the WAL log for PITR or replication purposes: in that case |
| 686 | * we have to make WAL entries as well.) |
| 687 | * |
| 688 | * The preceding writes should specify skipFsync = true to avoid |
| 689 | * duplicative fsyncs. |
| 690 | * |
| 691 | * Note that you need to do FlushRelationBuffers() first if there is |
| 692 | * any possibility that there are dirty buffers for the relation; |
| 693 | * otherwise the sync is not very meaningful. |
| 694 | */ |
| 695 | void |
| 696 | smgrimmedsync(SMgrRelation reln, ForkNumber forknum) |
| 697 | { |
| 698 | smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); |
| 699 | } |
| 700 | |
| 701 | /* |
| 702 | * AtEOXact_SMgr |
| 703 | * |
| 704 | * This routine is called during transaction commit or abort (it doesn't |
| 705 | * particularly care which). All transient SMgrRelation objects are closed. |
| 706 | * |
| 707 | * We do this as a compromise between wanting transient SMgrRelations to |
| 708 | * live awhile (to amortize the costs of blind writes of multiple blocks) |
| 709 | * and needing them to not live forever (since we're probably holding open |
| 710 | * a kernel file descriptor for the underlying file, and we need to ensure |
| 711 | * that gets closed reasonably soon if the file gets deleted). |
| 712 | */ |
| 713 | void |
| 714 | AtEOXact_SMgr(void) |
| 715 | { |
| 716 | dlist_mutable_iter iter; |
| 717 | |
| 718 | /* |
| 719 | * Zap all unowned SMgrRelations. We rely on smgrclose() to remove each |
| 720 | * one from the list. |
| 721 | */ |
| 722 | dlist_foreach_modify(iter, &unowned_relns) |
| 723 | { |
| 724 | SMgrRelation rel = dlist_container(SMgrRelationData, node, |
| 725 | iter.cur); |
| 726 | |
| 727 | Assert(rel->smgr_owner == NULL); |
| 728 | |
| 729 | smgrclose(rel); |
| 730 | } |
| 731 | } |
| 732 | |