1/*-------------------------------------------------------------------------
2 *
3 * buf_init.c
4 * buffer manager initialization routines
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/storage/buffer/buf_init.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "storage/bufmgr.h"
18#include "storage/buf_internals.h"
19
20
21BufferDescPadded *BufferDescriptors;
22char *BufferBlocks;
23LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
24WritebackContext BackendWritebackContext;
25CkptSortItem *CkptBufferIds;
26
27
28/*
29 * Data Structures:
30 * buffers live in a freelist and a lookup data structure.
31 *
32 *
33 * Buffer Lookup:
34 * Two important notes. First, the buffer has to be
35 * available for lookup BEFORE an IO begins. Otherwise
36 * a second process trying to read the buffer will
37 * allocate its own copy and the buffer pool will
38 * become inconsistent.
39 *
40 * Buffer Replacement:
41 * see freelist.c. A buffer cannot be replaced while in
42 * use either by data manager or during IO.
43 *
44 *
45 * Synchronization/Locking:
46 *
47 * IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
48 * It must be set when an IO is initiated and cleared at
49 * the end of the IO. It is there to make sure that one
50 * process doesn't start to use a buffer while another is
51 * faulting it in. see WaitIO and related routines.
52 *
53 * refcount -- Counts the number of processes holding pins on a buffer.
54 * A buffer is pinned during IO and immediately after a BufferAlloc().
55 * Pins must be released before end of transaction. For efficiency the
56 * shared refcount isn't increased if an individual backend pins a buffer
57 * multiple times. Check the PrivateRefCount infrastructure in bufmgr.c.
58 */
59
60
61/*
62 * Initialize shared buffer pool
63 *
64 * This is called once during shared-memory initialization (either in the
65 * postmaster, or in a standalone backend).
66 */
67void
68InitBufferPool(void)
69{
70 bool foundBufs,
71 foundDescs,
72 foundIOLocks,
73 foundBufCkpt;
74
75 /* Align descriptors to a cacheline boundary. */
76 BufferDescriptors = (BufferDescPadded *)
77 ShmemInitStruct("Buffer Descriptors",
78 NBuffers * sizeof(BufferDescPadded),
79 &foundDescs);
80
81 BufferBlocks = (char *)
82 ShmemInitStruct("Buffer Blocks",
83 NBuffers * (Size) BLCKSZ, &foundBufs);
84
85 /* Align lwlocks to cacheline boundary */
86 BufferIOLWLockArray = (LWLockMinimallyPadded *)
87 ShmemInitStruct("Buffer IO Locks",
88 NBuffers * (Size) sizeof(LWLockMinimallyPadded),
89 &foundIOLocks);
90
91 LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io");
92 LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content");
93
94 /*
95 * The array used to sort to-be-checkpointed buffer ids is located in
96 * shared memory, to avoid having to allocate significant amounts of
97 * memory at runtime. As that'd be in the middle of a checkpoint, or when
98 * the checkpointer is restarted, memory allocation failures would be
99 * painful.
100 */
101 CkptBufferIds = (CkptSortItem *)
102 ShmemInitStruct("Checkpoint BufferIds",
103 NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
104
105 if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt)
106 {
107 /* should find all of these, or none of them */
108 Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt);
109 /* note: this path is only taken in EXEC_BACKEND case */
110 }
111 else
112 {
113 int i;
114
115 /*
116 * Initialize all the buffer headers.
117 */
118 for (i = 0; i < NBuffers; i++)
119 {
120 BufferDesc *buf = GetBufferDescriptor(i);
121
122 CLEAR_BUFFERTAG(buf->tag);
123
124 pg_atomic_init_u32(&buf->state, 0);
125 buf->wait_backend_pid = 0;
126
127 buf->buf_id = i;
128
129 /*
130 * Initially link all the buffers together as unused. Subsequent
131 * management of this list is done by freelist.c.
132 */
133 buf->freeNext = i + 1;
134
135 LWLockInitialize(BufferDescriptorGetContentLock(buf),
136 LWTRANCHE_BUFFER_CONTENT);
137
138 LWLockInitialize(BufferDescriptorGetIOLock(buf),
139 LWTRANCHE_BUFFER_IO_IN_PROGRESS);
140 }
141
142 /* Correct last entry of linked list */
143 GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
144 }
145
146 /* Init other shared buffer-management stuff */
147 StrategyInitialize(!foundDescs);
148
149 /* Initialize per-backend file flush context */
150 WritebackContextInit(&BackendWritebackContext,
151 &backend_flush_after);
152}
153
154/*
155 * BufferShmemSize
156 *
157 * compute the size of shared memory for the buffer pool including
158 * data pages, buffer descriptors, hash tables, etc.
159 */
160Size
161BufferShmemSize(void)
162{
163 Size size = 0;
164
165 /* size of buffer descriptors */
166 size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
167 /* to allow aligning buffer descriptors */
168 size = add_size(size, PG_CACHE_LINE_SIZE);
169
170 /* size of data pages */
171 size = add_size(size, mul_size(NBuffers, BLCKSZ));
172
173 /* size of stuff controlled by freelist.c */
174 size = add_size(size, StrategyShmemSize());
175
176 /*
177 * It would be nice to include the I/O locks in the BufferDesc, but that
178 * would increase the size of a BufferDesc to more than one cache line,
179 * and benchmarking has shown that keeping every BufferDesc aligned on a
180 * cache line boundary is important for performance. So, instead, the
181 * array of I/O locks is allocated in a separate tranche. Because those
182 * locks are not highly contended, we lay out the array with minimal
183 * padding.
184 */
185 size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded)));
186 /* to allow aligning the above */
187 size = add_size(size, PG_CACHE_LINE_SIZE);
188
189 /* size of checkpoint sort array in bufmgr.c */
190 size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
191
192 return size;
193}
194