1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * smgr.c |
4 | * public interface routines to storage manager switch. |
5 | * |
6 | * All file system operations in POSTGRES dispatch through these |
7 | * routines. |
8 | * |
9 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
10 | * Portions Copyright (c) 1994, Regents of the University of California |
11 | * |
12 | * |
13 | * IDENTIFICATION |
14 | * src/backend/storage/smgr/smgr.c |
15 | * |
16 | *------------------------------------------------------------------------- |
17 | */ |
18 | #include "postgres.h" |
19 | |
20 | #include "commands/tablespace.h" |
21 | #include "lib/ilist.h" |
22 | #include "storage/bufmgr.h" |
23 | #include "storage/ipc.h" |
24 | #include "storage/md.h" |
25 | #include "storage/smgr.h" |
26 | #include "utils/hsearch.h" |
27 | #include "utils/inval.h" |
28 | |
29 | |
30 | /* |
31 | * This struct of function pointers defines the API between smgr.c and |
32 | * any individual storage manager module. Note that smgr subfunctions are |
33 | * generally expected to report problems via elog(ERROR). An exception is |
34 | * that smgr_unlink should use elog(WARNING), rather than erroring out, |
35 | * because we normally unlink relations during post-commit/abort cleanup, |
36 | * and so it's too late to raise an error. Also, various conditions that |
37 | * would normally be errors should be allowed during bootstrap and/or WAL |
38 | * recovery --- see comments in md.c for details. |
39 | */ |
40 | typedef struct f_smgr |
41 | { |
42 | void (*smgr_init) (void); /* may be NULL */ |
43 | void (*smgr_shutdown) (void); /* may be NULL */ |
44 | void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); |
45 | void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, |
46 | bool isRedo); |
47 | bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); |
48 | void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, |
49 | bool isRedo); |
50 | void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, |
51 | BlockNumber blocknum, char *buffer, bool skipFsync); |
52 | void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, |
53 | BlockNumber blocknum); |
54 | void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, |
55 | BlockNumber blocknum, char *buffer); |
56 | void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, |
57 | BlockNumber blocknum, char *buffer, bool skipFsync); |
58 | void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, |
59 | BlockNumber blocknum, BlockNumber nblocks); |
60 | BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); |
61 | void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, |
62 | BlockNumber nblocks); |
63 | void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); |
64 | } f_smgr; |
65 | |
66 | static const f_smgr smgrsw[] = { |
67 | /* magnetic disk */ |
68 | { |
69 | .smgr_init = mdinit, |
70 | .smgr_shutdown = NULL, |
71 | .smgr_close = mdclose, |
72 | .smgr_create = mdcreate, |
73 | .smgr_exists = mdexists, |
74 | .smgr_unlink = mdunlink, |
75 | .smgr_extend = mdextend, |
76 | .smgr_prefetch = mdprefetch, |
77 | .smgr_read = mdread, |
78 | .smgr_write = mdwrite, |
79 | .smgr_writeback = mdwriteback, |
80 | .smgr_nblocks = mdnblocks, |
81 | .smgr_truncate = mdtruncate, |
82 | .smgr_immedsync = mdimmedsync, |
83 | } |
84 | }; |
85 | |
86 | static const int NSmgr = lengthof(smgrsw); |
87 | |
88 | /* |
89 | * Each backend has a hashtable that stores all extant SMgrRelation objects. |
90 | * In addition, "unowned" SMgrRelation objects are chained together in a list. |
91 | */ |
92 | static HTAB *SMgrRelationHash = NULL; |
93 | |
94 | static dlist_head unowned_relns; |
95 | |
96 | /* local function prototypes */ |
97 | static void smgrshutdown(int code, Datum arg); |
98 | |
99 | |
100 | /* |
101 | * smgrinit(), smgrshutdown() -- Initialize or shut down storage |
102 | * managers. |
103 | * |
104 | * Note: smgrinit is called during backend startup (normal or standalone |
105 | * case), *not* during postmaster start. Therefore, any resources created |
106 | * here or destroyed in smgrshutdown are backend-local. |
107 | */ |
108 | void |
109 | smgrinit(void) |
110 | { |
111 | int i; |
112 | |
113 | for (i = 0; i < NSmgr; i++) |
114 | { |
115 | if (smgrsw[i].smgr_init) |
116 | smgrsw[i].smgr_init(); |
117 | } |
118 | |
119 | /* register the shutdown proc */ |
120 | on_proc_exit(smgrshutdown, 0); |
121 | } |
122 | |
123 | /* |
124 | * on_proc_exit hook for smgr cleanup during backend shutdown |
125 | */ |
126 | static void |
127 | smgrshutdown(int code, Datum arg) |
128 | { |
129 | int i; |
130 | |
131 | for (i = 0; i < NSmgr; i++) |
132 | { |
133 | if (smgrsw[i].smgr_shutdown) |
134 | smgrsw[i].smgr_shutdown(); |
135 | } |
136 | } |
137 | |
138 | /* |
139 | * smgropen() -- Return an SMgrRelation object, creating it if need be. |
140 | * |
141 | * This does not attempt to actually open the underlying file. |
142 | */ |
143 | SMgrRelation |
144 | smgropen(RelFileNode rnode, BackendId backend) |
145 | { |
146 | RelFileNodeBackend brnode; |
147 | SMgrRelation reln; |
148 | bool found; |
149 | |
150 | if (SMgrRelationHash == NULL) |
151 | { |
152 | /* First time through: initialize the hash table */ |
153 | HASHCTL ctl; |
154 | |
155 | MemSet(&ctl, 0, sizeof(ctl)); |
156 | ctl.keysize = sizeof(RelFileNodeBackend); |
157 | ctl.entrysize = sizeof(SMgrRelationData); |
158 | SMgrRelationHash = hash_create("smgr relation table" , 400, |
159 | &ctl, HASH_ELEM | HASH_BLOBS); |
160 | dlist_init(&unowned_relns); |
161 | } |
162 | |
163 | /* Look up or create an entry */ |
164 | brnode.node = rnode; |
165 | brnode.backend = backend; |
166 | reln = (SMgrRelation) hash_search(SMgrRelationHash, |
167 | (void *) &brnode, |
168 | HASH_ENTER, &found); |
169 | |
170 | /* Initialize it if not present before */ |
171 | if (!found) |
172 | { |
173 | int forknum; |
174 | |
175 | /* hash_search already filled in the lookup key */ |
176 | reln->smgr_owner = NULL; |
177 | reln->smgr_targblock = InvalidBlockNumber; |
178 | reln->smgr_fsm_nblocks = InvalidBlockNumber; |
179 | reln->smgr_vm_nblocks = InvalidBlockNumber; |
180 | reln->smgr_which = 0; /* we only have md.c at present */ |
181 | |
182 | /* mark it not open */ |
183 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
184 | reln->md_num_open_segs[forknum] = 0; |
185 | |
186 | /* it has no owner yet */ |
187 | dlist_push_tail(&unowned_relns, &reln->node); |
188 | } |
189 | |
190 | return reln; |
191 | } |
192 | |
193 | /* |
194 | * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object |
195 | * |
196 | * There can be only one owner at a time; this is sufficient since currently |
197 | * the only such owners exist in the relcache. |
198 | */ |
199 | void |
200 | smgrsetowner(SMgrRelation *owner, SMgrRelation reln) |
201 | { |
202 | /* We don't support "disowning" an SMgrRelation here, use smgrclearowner */ |
203 | Assert(owner != NULL); |
204 | |
205 | /* |
206 | * First, unhook any old owner. (Normally there shouldn't be any, but it |
207 | * seems possible that this can happen during swap_relation_files() |
208 | * depending on the order of processing. It's ok to close the old |
209 | * relcache entry early in that case.) |
210 | * |
211 | * If there isn't an old owner, then the reln should be in the unowned |
212 | * list, and we need to remove it. |
213 | */ |
214 | if (reln->smgr_owner) |
215 | *(reln->smgr_owner) = NULL; |
216 | else |
217 | dlist_delete(&reln->node); |
218 | |
219 | /* Now establish the ownership relationship. */ |
220 | reln->smgr_owner = owner; |
221 | *owner = reln; |
222 | } |
223 | |
224 | /* |
225 | * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object |
226 | * if one exists |
227 | */ |
228 | void |
229 | smgrclearowner(SMgrRelation *owner, SMgrRelation reln) |
230 | { |
231 | /* Do nothing if the SMgrRelation object is not owned by the owner */ |
232 | if (reln->smgr_owner != owner) |
233 | return; |
234 | |
235 | /* unset the owner's reference */ |
236 | *owner = NULL; |
237 | |
238 | /* unset our reference to the owner */ |
239 | reln->smgr_owner = NULL; |
240 | |
241 | /* add to list of unowned relations */ |
242 | dlist_push_tail(&unowned_relns, &reln->node); |
243 | } |
244 | |
245 | /* |
246 | * smgrexists() -- Does the underlying file for a fork exist? |
247 | */ |
248 | bool |
249 | smgrexists(SMgrRelation reln, ForkNumber forknum) |
250 | { |
251 | return smgrsw[reln->smgr_which].smgr_exists(reln, forknum); |
252 | } |
253 | |
254 | /* |
255 | * smgrclose() -- Close and delete an SMgrRelation object. |
256 | */ |
257 | void |
258 | smgrclose(SMgrRelation reln) |
259 | { |
260 | SMgrRelation *owner; |
261 | ForkNumber forknum; |
262 | |
263 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
264 | smgrsw[reln->smgr_which].smgr_close(reln, forknum); |
265 | |
266 | owner = reln->smgr_owner; |
267 | |
268 | if (!owner) |
269 | dlist_delete(&reln->node); |
270 | |
271 | if (hash_search(SMgrRelationHash, |
272 | (void *) &(reln->smgr_rnode), |
273 | HASH_REMOVE, NULL) == NULL) |
274 | elog(ERROR, "SMgrRelation hashtable corrupted" ); |
275 | |
276 | /* |
277 | * Unhook the owner pointer, if any. We do this last since in the remote |
278 | * possibility of failure above, the SMgrRelation object will still exist. |
279 | */ |
280 | if (owner) |
281 | *owner = NULL; |
282 | } |
283 | |
284 | /* |
285 | * smgrcloseall() -- Close all existing SMgrRelation objects. |
286 | */ |
287 | void |
288 | smgrcloseall(void) |
289 | { |
290 | HASH_SEQ_STATUS status; |
291 | SMgrRelation reln; |
292 | |
293 | /* Nothing to do if hashtable not set up */ |
294 | if (SMgrRelationHash == NULL) |
295 | return; |
296 | |
297 | hash_seq_init(&status, SMgrRelationHash); |
298 | |
299 | while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) |
300 | smgrclose(reln); |
301 | } |
302 | |
303 | /* |
304 | * smgrclosenode() -- Close SMgrRelation object for given RelFileNode, |
305 | * if one exists. |
306 | * |
307 | * This has the same effects as smgrclose(smgropen(rnode)), but it avoids |
308 | * uselessly creating a hashtable entry only to drop it again when no |
309 | * such entry exists already. |
310 | */ |
311 | void |
312 | smgrclosenode(RelFileNodeBackend rnode) |
313 | { |
314 | SMgrRelation reln; |
315 | |
316 | /* Nothing to do if hashtable not set up */ |
317 | if (SMgrRelationHash == NULL) |
318 | return; |
319 | |
320 | reln = (SMgrRelation) hash_search(SMgrRelationHash, |
321 | (void *) &rnode, |
322 | HASH_FIND, NULL); |
323 | if (reln != NULL) |
324 | smgrclose(reln); |
325 | } |
326 | |
327 | /* |
328 | * smgrcreate() -- Create a new relation. |
329 | * |
330 | * Given an already-created (but presumably unused) SMgrRelation, |
331 | * cause the underlying disk file or other storage for the fork |
332 | * to be created. |
333 | * |
334 | * If isRedo is true, it is okay for the underlying file to exist |
335 | * already because we are in a WAL replay sequence. |
336 | */ |
337 | void |
338 | smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) |
339 | { |
340 | /* |
341 | * Exit quickly in WAL replay mode if we've already opened the file. If |
342 | * it's open, it surely must exist. |
343 | */ |
344 | if (isRedo && reln->md_num_open_segs[forknum] > 0) |
345 | return; |
346 | |
347 | /* |
348 | * We may be using the target table space for the first time in this |
349 | * database, so create a per-database subdirectory if needed. |
350 | * |
351 | * XXX this is a fairly ugly violation of module layering, but this seems |
352 | * to be the best place to put the check. Maybe TablespaceCreateDbspace |
353 | * should be here and not in commands/tablespace.c? But that would imply |
354 | * importing a lot of stuff that smgr.c oughtn't know, either. |
355 | */ |
356 | TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode, |
357 | reln->smgr_rnode.node.dbNode, |
358 | isRedo); |
359 | |
360 | smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); |
361 | } |
362 | |
363 | /* |
364 | * smgrdounlink() -- Immediately unlink all forks of a relation. |
365 | * |
366 | * All forks of the relation are removed from the store. This should |
367 | * not be used during transactional operations, since it can't be undone. |
368 | * |
369 | * If isRedo is true, it is okay for the underlying file(s) to be gone |
370 | * already. |
371 | * |
372 | * This is equivalent to calling smgrdounlinkfork for each fork, but |
373 | * it's significantly quicker so should be preferred when possible. |
374 | */ |
375 | void |
376 | smgrdounlink(SMgrRelation reln, bool isRedo) |
377 | { |
378 | RelFileNodeBackend rnode = reln->smgr_rnode; |
379 | int which = reln->smgr_which; |
380 | ForkNumber forknum; |
381 | |
382 | /* Close the forks at smgr level */ |
383 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
384 | smgrsw[which].smgr_close(reln, forknum); |
385 | |
386 | /* |
387 | * Get rid of any remaining buffers for the relation. bufmgr will just |
388 | * drop them without bothering to write the contents. |
389 | */ |
390 | DropRelFileNodesAllBuffers(&rnode, 1); |
391 | |
392 | /* |
393 | * It'd be nice to tell the stats collector to forget it immediately, too. |
394 | * But we can't because we don't know the OID (and in cases involving |
395 | * relfilenode swaps, it's not always clear which table OID to forget, |
396 | * anyway). |
397 | */ |
398 | |
399 | /* |
400 | * Send a shared-inval message to force other backends to close any |
401 | * dangling smgr references they may have for this rel. We should do this |
402 | * before starting the actual unlinking, in case we fail partway through |
403 | * that step. Note that the sinval message will eventually come back to |
404 | * this backend, too, and thereby provide a backstop that we closed our |
405 | * own smgr rel. |
406 | */ |
407 | CacheInvalidateSmgr(rnode); |
408 | |
409 | /* |
410 | * Delete the physical file(s). |
411 | * |
412 | * Note: smgr_unlink must treat deletion failure as a WARNING, not an |
413 | * ERROR, because we've already decided to commit or abort the current |
414 | * xact. |
415 | */ |
416 | smgrsw[which].smgr_unlink(rnode, InvalidForkNumber, isRedo); |
417 | } |
418 | |
419 | /* |
420 | * smgrdounlinkall() -- Immediately unlink all forks of all given relations |
421 | * |
422 | * All forks of all given relations are removed from the store. This |
423 | * should not be used during transactional operations, since it can't be |
424 | * undone. |
425 | * |
426 | * If isRedo is true, it is okay for the underlying file(s) to be gone |
427 | * already. |
428 | * |
429 | * This is equivalent to calling smgrdounlink for each relation, but it's |
430 | * significantly quicker so should be preferred when possible. |
431 | */ |
432 | void |
433 | smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) |
434 | { |
435 | int i = 0; |
436 | RelFileNodeBackend *rnodes; |
437 | ForkNumber forknum; |
438 | |
439 | if (nrels == 0) |
440 | return; |
441 | |
442 | /* |
443 | * create an array which contains all relations to be dropped, and close |
444 | * each relation's forks at the smgr level while at it |
445 | */ |
446 | rnodes = palloc(sizeof(RelFileNodeBackend) * nrels); |
447 | for (i = 0; i < nrels; i++) |
448 | { |
449 | RelFileNodeBackend rnode = rels[i]->smgr_rnode; |
450 | int which = rels[i]->smgr_which; |
451 | |
452 | rnodes[i] = rnode; |
453 | |
454 | /* Close the forks at smgr level */ |
455 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
456 | smgrsw[which].smgr_close(rels[i], forknum); |
457 | } |
458 | |
459 | /* |
460 | * Get rid of any remaining buffers for the relations. bufmgr will just |
461 | * drop them without bothering to write the contents. |
462 | */ |
463 | DropRelFileNodesAllBuffers(rnodes, nrels); |
464 | |
465 | /* |
466 | * It'd be nice to tell the stats collector to forget them immediately, |
467 | * too. But we can't because we don't know the OIDs. |
468 | */ |
469 | |
470 | /* |
471 | * Send a shared-inval message to force other backends to close any |
472 | * dangling smgr references they may have for these rels. We should do |
473 | * this before starting the actual unlinking, in case we fail partway |
474 | * through that step. Note that the sinval messages will eventually come |
475 | * back to this backend, too, and thereby provide a backstop that we |
476 | * closed our own smgr rel. |
477 | */ |
478 | for (i = 0; i < nrels; i++) |
479 | CacheInvalidateSmgr(rnodes[i]); |
480 | |
481 | /* |
482 | * Delete the physical file(s). |
483 | * |
484 | * Note: smgr_unlink must treat deletion failure as a WARNING, not an |
485 | * ERROR, because we've already decided to commit or abort the current |
486 | * xact. |
487 | */ |
488 | |
489 | for (i = 0; i < nrels; i++) |
490 | { |
491 | int which = rels[i]->smgr_which; |
492 | |
493 | for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
494 | smgrsw[which].smgr_unlink(rnodes[i], forknum, isRedo); |
495 | } |
496 | |
497 | pfree(rnodes); |
498 | } |
499 | |
500 | /* |
501 | * smgrdounlinkfork() -- Immediately unlink one fork of a relation. |
502 | * |
503 | * The specified fork of the relation is removed from the store. This |
504 | * should not be used during transactional operations, since it can't be |
505 | * undone. |
506 | * |
507 | * If isRedo is true, it is okay for the underlying file to be gone |
508 | * already. |
509 | */ |
510 | void |
511 | smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo) |
512 | { |
513 | RelFileNodeBackend rnode = reln->smgr_rnode; |
514 | int which = reln->smgr_which; |
515 | |
516 | /* Close the fork at smgr level */ |
517 | smgrsw[which].smgr_close(reln, forknum); |
518 | |
519 | /* |
520 | * Get rid of any remaining buffers for the fork. bufmgr will just drop |
521 | * them without bothering to write the contents. |
522 | */ |
523 | DropRelFileNodeBuffers(rnode, forknum, 0); |
524 | |
525 | /* |
526 | * It'd be nice to tell the stats collector to forget it immediately, too. |
527 | * But we can't because we don't know the OID (and in cases involving |
528 | * relfilenode swaps, it's not always clear which table OID to forget, |
529 | * anyway). |
530 | */ |
531 | |
532 | /* |
533 | * Send a shared-inval message to force other backends to close any |
534 | * dangling smgr references they may have for this rel. We should do this |
535 | * before starting the actual unlinking, in case we fail partway through |
536 | * that step. Note that the sinval message will eventually come back to |
537 | * this backend, too, and thereby provide a backstop that we closed our |
538 | * own smgr rel. |
539 | */ |
540 | CacheInvalidateSmgr(rnode); |
541 | |
542 | /* |
543 | * Delete the physical file(s). |
544 | * |
545 | * Note: smgr_unlink must treat deletion failure as a WARNING, not an |
546 | * ERROR, because we've already decided to commit or abort the current |
547 | * xact. |
548 | */ |
549 | smgrsw[which].smgr_unlink(rnode, forknum, isRedo); |
550 | } |
551 | |
552 | /* |
553 | * smgrextend() -- Add a new block to a file. |
554 | * |
555 | * The semantics are nearly the same as smgrwrite(): write at the |
556 | * specified position. However, this is to be used for the case of |
557 | * extending a relation (i.e., blocknum is at or beyond the current |
558 | * EOF). Note that we assume writing a block beyond current EOF |
559 | * causes intervening file space to become filled with zeroes. |
560 | */ |
561 | void |
562 | smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
563 | char *buffer, bool skipFsync) |
564 | { |
565 | smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, |
566 | buffer, skipFsync); |
567 | } |
568 | |
569 | /* |
570 | * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation. |
571 | */ |
572 | void |
573 | smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) |
574 | { |
575 | smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum); |
576 | } |
577 | |
578 | /* |
579 | * smgrread() -- read a particular block from a relation into the supplied |
580 | * buffer. |
581 | * |
582 | * This routine is called from the buffer manager in order to |
583 | * instantiate pages in the shared buffer cache. All storage managers |
584 | * return pages in the format that POSTGRES expects. |
585 | */ |
586 | void |
587 | smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
588 | char *buffer) |
589 | { |
590 | smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer); |
591 | } |
592 | |
593 | /* |
594 | * smgrwrite() -- Write the supplied buffer out. |
595 | * |
596 | * This is to be used only for updating already-existing blocks of a |
597 | * relation (ie, those before the current EOF). To extend a relation, |
598 | * use smgrextend(). |
599 | * |
600 | * This is not a synchronous write -- the block is not necessarily |
601 | * on disk at return, only dumped out to the kernel. However, |
602 | * provisions will be made to fsync the write before the next checkpoint. |
603 | * |
604 | * skipFsync indicates that the caller will make other provisions to |
605 | * fsync the relation, so we needn't bother. Temporary relations also |
606 | * do not require fsync. |
607 | */ |
608 | void |
609 | smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
610 | char *buffer, bool skipFsync) |
611 | { |
612 | smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum, |
613 | buffer, skipFsync); |
614 | } |
615 | |
616 | |
617 | /* |
618 | * smgrwriteback() -- Trigger kernel writeback for the supplied range of |
619 | * blocks. |
620 | */ |
621 | void |
622 | smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, |
623 | BlockNumber nblocks) |
624 | { |
625 | smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, |
626 | nblocks); |
627 | } |
628 | |
629 | /* |
630 | * smgrnblocks() -- Calculate the number of blocks in the |
631 | * supplied relation. |
632 | */ |
633 | BlockNumber |
634 | smgrnblocks(SMgrRelation reln, ForkNumber forknum) |
635 | { |
636 | return smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); |
637 | } |
638 | |
639 | /* |
640 | * smgrtruncate() -- Truncate supplied relation to the specified number |
641 | * of blocks |
642 | * |
643 | * The truncation is done immediately, so this can't be rolled back. |
644 | */ |
645 | void |
646 | smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) |
647 | { |
648 | /* |
649 | * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will |
650 | * just drop them without bothering to write the contents. |
651 | */ |
652 | DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks); |
653 | |
654 | /* |
655 | * Send a shared-inval message to force other backends to close any smgr |
656 | * references they may have for this rel. This is useful because they |
657 | * might have open file pointers to segments that got removed, and/or |
658 | * smgr_targblock variables pointing past the new rel end. (The inval |
659 | * message will come back to our backend, too, causing a |
660 | * probably-unnecessary local smgr flush. But we don't expect that this |
661 | * is a performance-critical path.) As in the unlink code, we want to be |
662 | * sure the message is sent before we start changing things on-disk. |
663 | */ |
664 | CacheInvalidateSmgr(reln->smgr_rnode); |
665 | |
666 | /* |
667 | * Do the truncation. |
668 | */ |
669 | smgrsw[reln->smgr_which].smgr_truncate(reln, forknum, nblocks); |
670 | } |
671 | |
672 | /* |
673 | * smgrimmedsync() -- Force the specified relation to stable storage. |
674 | * |
675 | * Synchronously force all previous writes to the specified relation |
676 | * down to disk. |
677 | * |
678 | * This is useful for building completely new relations (eg, new |
679 | * indexes). Instead of incrementally WAL-logging the index build |
680 | * steps, we can just write completed index pages to disk with smgrwrite |
681 | * or smgrextend, and then fsync the completed index file before |
682 | * committing the transaction. (This is sufficient for purposes of |
683 | * crash recovery, since it effectively duplicates forcing a checkpoint |
684 | * for the completed index. But it is *not* sufficient if one wishes |
685 | * to use the WAL log for PITR or replication purposes: in that case |
686 | * we have to make WAL entries as well.) |
687 | * |
688 | * The preceding writes should specify skipFsync = true to avoid |
689 | * duplicative fsyncs. |
690 | * |
691 | * Note that you need to do FlushRelationBuffers() first if there is |
692 | * any possibility that there are dirty buffers for the relation; |
693 | * otherwise the sync is not very meaningful. |
694 | */ |
695 | void |
696 | smgrimmedsync(SMgrRelation reln, ForkNumber forknum) |
697 | { |
698 | smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); |
699 | } |
700 | |
701 | /* |
702 | * AtEOXact_SMgr |
703 | * |
704 | * This routine is called during transaction commit or abort (it doesn't |
705 | * particularly care which). All transient SMgrRelation objects are closed. |
706 | * |
707 | * We do this as a compromise between wanting transient SMgrRelations to |
708 | * live awhile (to amortize the costs of blind writes of multiple blocks) |
709 | * and needing them to not live forever (since we're probably holding open |
710 | * a kernel file descriptor for the underlying file, and we need to ensure |
711 | * that gets closed reasonably soon if the file gets deleted). |
712 | */ |
713 | void |
714 | AtEOXact_SMgr(void) |
715 | { |
716 | dlist_mutable_iter iter; |
717 | |
718 | /* |
719 | * Zap all unowned SMgrRelations. We rely on smgrclose() to remove each |
720 | * one from the list. |
721 | */ |
722 | dlist_foreach_modify(iter, &unowned_relns) |
723 | { |
724 | SMgrRelation rel = dlist_container(SMgrRelationData, node, |
725 | iter.cur); |
726 | |
727 | Assert(rel->smgr_owner == NULL); |
728 | |
729 | smgrclose(rel); |
730 | } |
731 | } |
732 | |