| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * buf_init.c |
| 4 | * buffer manager initialization routines |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/storage/buffer/buf_init.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include "storage/bufmgr.h" |
| 18 | #include "storage/buf_internals.h" |
| 19 | |
| 20 | |
| 21 | BufferDescPadded *BufferDescriptors; |
| 22 | char *BufferBlocks; |
| 23 | LWLockMinimallyPadded *BufferIOLWLockArray = NULL; |
| 24 | WritebackContext BackendWritebackContext; |
| 25 | CkptSortItem *CkptBufferIds; |
| 26 | |
| 27 | |
| 28 | /* |
| 29 | * Data Structures: |
| 30 | * buffers live in a freelist and a lookup data structure. |
| 31 | * |
| 32 | * |
| 33 | * Buffer Lookup: |
| 34 | * Two important notes. First, the buffer has to be |
| 35 | * available for lookup BEFORE an IO begins. Otherwise |
| 36 | * a second process trying to read the buffer will |
| 37 | * allocate its own copy and the buffer pool will |
| 38 | * become inconsistent. |
| 39 | * |
| 40 | * Buffer Replacement: |
| 41 | * see freelist.c. A buffer cannot be replaced while in |
| 42 | * use either by data manager or during IO. |
| 43 | * |
| 44 | * |
| 45 | * Synchronization/Locking: |
| 46 | * |
| 47 | * IO_IN_PROGRESS -- this is a flag in the buffer descriptor. |
| 48 | * It must be set when an IO is initiated and cleared at |
| 49 | * the end of the IO. It is there to make sure that one |
| 50 | * process doesn't start to use a buffer while another is |
| 51 | * faulting it in. see WaitIO and related routines. |
| 52 | * |
| 53 | * refcount -- Counts the number of processes holding pins on a buffer. |
| 54 | * A buffer is pinned during IO and immediately after a BufferAlloc(). |
| 55 | * Pins must be released before end of transaction. For efficiency the |
| 56 | * shared refcount isn't increased if an individual backend pins a buffer |
| 57 | * multiple times. Check the PrivateRefCount infrastructure in bufmgr.c. |
| 58 | */ |
| 59 | |
| 60 | |
| 61 | /* |
| 62 | * Initialize shared buffer pool |
| 63 | * |
| 64 | * This is called once during shared-memory initialization (either in the |
| 65 | * postmaster, or in a standalone backend). |
| 66 | */ |
| 67 | void |
| 68 | InitBufferPool(void) |
| 69 | { |
| 70 | bool foundBufs, |
| 71 | foundDescs, |
| 72 | foundIOLocks, |
| 73 | foundBufCkpt; |
| 74 | |
| 75 | /* Align descriptors to a cacheline boundary. */ |
| 76 | BufferDescriptors = (BufferDescPadded *) |
| 77 | ShmemInitStruct("Buffer Descriptors" , |
| 78 | NBuffers * sizeof(BufferDescPadded), |
| 79 | &foundDescs); |
| 80 | |
| 81 | BufferBlocks = (char *) |
| 82 | ShmemInitStruct("Buffer Blocks" , |
| 83 | NBuffers * (Size) BLCKSZ, &foundBufs); |
| 84 | |
| 85 | /* Align lwlocks to cacheline boundary */ |
| 86 | BufferIOLWLockArray = (LWLockMinimallyPadded *) |
| 87 | ShmemInitStruct("Buffer IO Locks" , |
| 88 | NBuffers * (Size) sizeof(LWLockMinimallyPadded), |
| 89 | &foundIOLocks); |
| 90 | |
| 91 | LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io" ); |
| 92 | LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content" ); |
| 93 | |
| 94 | /* |
| 95 | * The array used to sort to-be-checkpointed buffer ids is located in |
| 96 | * shared memory, to avoid having to allocate significant amounts of |
| 97 | * memory at runtime. As that'd be in the middle of a checkpoint, or when |
| 98 | * the checkpointer is restarted, memory allocation failures would be |
| 99 | * painful. |
| 100 | */ |
| 101 | CkptBufferIds = (CkptSortItem *) |
| 102 | ShmemInitStruct("Checkpoint BufferIds" , |
| 103 | NBuffers * sizeof(CkptSortItem), &foundBufCkpt); |
| 104 | |
| 105 | if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt) |
| 106 | { |
| 107 | /* should find all of these, or none of them */ |
| 108 | Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt); |
| 109 | /* note: this path is only taken in EXEC_BACKEND case */ |
| 110 | } |
| 111 | else |
| 112 | { |
| 113 | int i; |
| 114 | |
| 115 | /* |
| 116 | * Initialize all the buffer headers. |
| 117 | */ |
| 118 | for (i = 0; i < NBuffers; i++) |
| 119 | { |
| 120 | BufferDesc *buf = GetBufferDescriptor(i); |
| 121 | |
| 122 | CLEAR_BUFFERTAG(buf->tag); |
| 123 | |
| 124 | pg_atomic_init_u32(&buf->state, 0); |
| 125 | buf->wait_backend_pid = 0; |
| 126 | |
| 127 | buf->buf_id = i; |
| 128 | |
| 129 | /* |
| 130 | * Initially link all the buffers together as unused. Subsequent |
| 131 | * management of this list is done by freelist.c. |
| 132 | */ |
| 133 | buf->freeNext = i + 1; |
| 134 | |
| 135 | LWLockInitialize(BufferDescriptorGetContentLock(buf), |
| 136 | LWTRANCHE_BUFFER_CONTENT); |
| 137 | |
| 138 | LWLockInitialize(BufferDescriptorGetIOLock(buf), |
| 139 | LWTRANCHE_BUFFER_IO_IN_PROGRESS); |
| 140 | } |
| 141 | |
| 142 | /* Correct last entry of linked list */ |
| 143 | GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; |
| 144 | } |
| 145 | |
| 146 | /* Init other shared buffer-management stuff */ |
| 147 | StrategyInitialize(!foundDescs); |
| 148 | |
| 149 | /* Initialize per-backend file flush context */ |
| 150 | WritebackContextInit(&BackendWritebackContext, |
| 151 | &backend_flush_after); |
| 152 | } |
| 153 | |
| 154 | /* |
| 155 | * BufferShmemSize |
| 156 | * |
| 157 | * compute the size of shared memory for the buffer pool including |
| 158 | * data pages, buffer descriptors, hash tables, etc. |
| 159 | */ |
| 160 | Size |
| 161 | BufferShmemSize(void) |
| 162 | { |
| 163 | Size size = 0; |
| 164 | |
| 165 | /* size of buffer descriptors */ |
| 166 | size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); |
| 167 | /* to allow aligning buffer descriptors */ |
| 168 | size = add_size(size, PG_CACHE_LINE_SIZE); |
| 169 | |
| 170 | /* size of data pages */ |
| 171 | size = add_size(size, mul_size(NBuffers, BLCKSZ)); |
| 172 | |
| 173 | /* size of stuff controlled by freelist.c */ |
| 174 | size = add_size(size, StrategyShmemSize()); |
| 175 | |
| 176 | /* |
| 177 | * It would be nice to include the I/O locks in the BufferDesc, but that |
| 178 | * would increase the size of a BufferDesc to more than one cache line, |
| 179 | * and benchmarking has shown that keeping every BufferDesc aligned on a |
| 180 | * cache line boundary is important for performance. So, instead, the |
| 181 | * array of I/O locks is allocated in a separate tranche. Because those |
| 182 | * locks are not highly contended, we lay out the array with minimal |
| 183 | * padding. |
| 184 | */ |
| 185 | size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded))); |
| 186 | /* to allow aligning the above */ |
| 187 | size = add_size(size, PG_CACHE_LINE_SIZE); |
| 188 | |
| 189 | /* size of checkpoint sort array in bufmgr.c */ |
| 190 | size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); |
| 191 | |
| 192 | return size; |
| 193 | } |
| 194 | |