1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * buf_init.c |
4 | * buffer manager initialization routines |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/storage/buffer/buf_init.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include "storage/bufmgr.h" |
18 | #include "storage/buf_internals.h" |
19 | |
20 | |
21 | BufferDescPadded *BufferDescriptors; |
22 | char *BufferBlocks; |
23 | LWLockMinimallyPadded *BufferIOLWLockArray = NULL; |
24 | WritebackContext BackendWritebackContext; |
25 | CkptSortItem *CkptBufferIds; |
26 | |
27 | |
28 | /* |
29 | * Data Structures: |
30 | * buffers live in a freelist and a lookup data structure. |
31 | * |
32 | * |
33 | * Buffer Lookup: |
34 | * Two important notes. First, the buffer has to be |
35 | * available for lookup BEFORE an IO begins. Otherwise |
36 | * a second process trying to read the buffer will |
37 | * allocate its own copy and the buffer pool will |
38 | * become inconsistent. |
39 | * |
40 | * Buffer Replacement: |
41 | * see freelist.c. A buffer cannot be replaced while in |
42 | * use either by data manager or during IO. |
43 | * |
44 | * |
45 | * Synchronization/Locking: |
46 | * |
47 | * IO_IN_PROGRESS -- this is a flag in the buffer descriptor. |
48 | * It must be set when an IO is initiated and cleared at |
49 | * the end of the IO. It is there to make sure that one |
50 | * process doesn't start to use a buffer while another is |
51 | * faulting it in. see WaitIO and related routines. |
52 | * |
53 | * refcount -- Counts the number of processes holding pins on a buffer. |
54 | * A buffer is pinned during IO and immediately after a BufferAlloc(). |
55 | * Pins must be released before end of transaction. For efficiency the |
56 | * shared refcount isn't increased if an individual backend pins a buffer |
57 | * multiple times. Check the PrivateRefCount infrastructure in bufmgr.c. |
58 | */ |
59 | |
60 | |
61 | /* |
62 | * Initialize shared buffer pool |
63 | * |
64 | * This is called once during shared-memory initialization (either in the |
65 | * postmaster, or in a standalone backend). |
66 | */ |
67 | void |
68 | InitBufferPool(void) |
69 | { |
70 | bool foundBufs, |
71 | foundDescs, |
72 | foundIOLocks, |
73 | foundBufCkpt; |
74 | |
75 | /* Align descriptors to a cacheline boundary. */ |
76 | BufferDescriptors = (BufferDescPadded *) |
77 | ShmemInitStruct("Buffer Descriptors" , |
78 | NBuffers * sizeof(BufferDescPadded), |
79 | &foundDescs); |
80 | |
81 | BufferBlocks = (char *) |
82 | ShmemInitStruct("Buffer Blocks" , |
83 | NBuffers * (Size) BLCKSZ, &foundBufs); |
84 | |
85 | /* Align lwlocks to cacheline boundary */ |
86 | BufferIOLWLockArray = (LWLockMinimallyPadded *) |
87 | ShmemInitStruct("Buffer IO Locks" , |
88 | NBuffers * (Size) sizeof(LWLockMinimallyPadded), |
89 | &foundIOLocks); |
90 | |
91 | LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io" ); |
92 | LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content" ); |
93 | |
94 | /* |
95 | * The array used to sort to-be-checkpointed buffer ids is located in |
96 | * shared memory, to avoid having to allocate significant amounts of |
97 | * memory at runtime. As that'd be in the middle of a checkpoint, or when |
98 | * the checkpointer is restarted, memory allocation failures would be |
99 | * painful. |
100 | */ |
101 | CkptBufferIds = (CkptSortItem *) |
102 | ShmemInitStruct("Checkpoint BufferIds" , |
103 | NBuffers * sizeof(CkptSortItem), &foundBufCkpt); |
104 | |
105 | if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt) |
106 | { |
107 | /* should find all of these, or none of them */ |
108 | Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt); |
109 | /* note: this path is only taken in EXEC_BACKEND case */ |
110 | } |
111 | else |
112 | { |
113 | int i; |
114 | |
115 | /* |
116 | * Initialize all the buffer headers. |
117 | */ |
118 | for (i = 0; i < NBuffers; i++) |
119 | { |
120 | BufferDesc *buf = GetBufferDescriptor(i); |
121 | |
122 | CLEAR_BUFFERTAG(buf->tag); |
123 | |
124 | pg_atomic_init_u32(&buf->state, 0); |
125 | buf->wait_backend_pid = 0; |
126 | |
127 | buf->buf_id = i; |
128 | |
129 | /* |
130 | * Initially link all the buffers together as unused. Subsequent |
131 | * management of this list is done by freelist.c. |
132 | */ |
133 | buf->freeNext = i + 1; |
134 | |
135 | LWLockInitialize(BufferDescriptorGetContentLock(buf), |
136 | LWTRANCHE_BUFFER_CONTENT); |
137 | |
138 | LWLockInitialize(BufferDescriptorGetIOLock(buf), |
139 | LWTRANCHE_BUFFER_IO_IN_PROGRESS); |
140 | } |
141 | |
142 | /* Correct last entry of linked list */ |
143 | GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; |
144 | } |
145 | |
146 | /* Init other shared buffer-management stuff */ |
147 | StrategyInitialize(!foundDescs); |
148 | |
149 | /* Initialize per-backend file flush context */ |
150 | WritebackContextInit(&BackendWritebackContext, |
151 | &backend_flush_after); |
152 | } |
153 | |
154 | /* |
155 | * BufferShmemSize |
156 | * |
157 | * compute the size of shared memory for the buffer pool including |
158 | * data pages, buffer descriptors, hash tables, etc. |
159 | */ |
160 | Size |
161 | BufferShmemSize(void) |
162 | { |
163 | Size size = 0; |
164 | |
165 | /* size of buffer descriptors */ |
166 | size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); |
167 | /* to allow aligning buffer descriptors */ |
168 | size = add_size(size, PG_CACHE_LINE_SIZE); |
169 | |
170 | /* size of data pages */ |
171 | size = add_size(size, mul_size(NBuffers, BLCKSZ)); |
172 | |
173 | /* size of stuff controlled by freelist.c */ |
174 | size = add_size(size, StrategyShmemSize()); |
175 | |
176 | /* |
177 | * It would be nice to include the I/O locks in the BufferDesc, but that |
178 | * would increase the size of a BufferDesc to more than one cache line, |
179 | * and benchmarking has shown that keeping every BufferDesc aligned on a |
180 | * cache line boundary is important for performance. So, instead, the |
181 | * array of I/O locks is allocated in a separate tranche. Because those |
182 | * locks are not highly contended, we lay out the array with minimal |
183 | * padding. |
184 | */ |
185 | size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded))); |
186 | /* to allow aligning the above */ |
187 | size = add_size(size, PG_CACHE_LINE_SIZE); |
188 | |
189 | /* size of checkpoint sort array in bufmgr.c */ |
190 | size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); |
191 | |
192 | return size; |
193 | } |
194 | |