1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * shmem.c |
4 | * create shared memory and initialize shared memory data structures. |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/storage/ipc/shmem.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | /* |
16 | * POSTGRES processes share one or more regions of shared memory. |
17 | * The shared memory is created by a postmaster and is inherited |
18 | * by each backend via fork() (or, in some ports, via other OS-specific |
19 | * methods). The routines in this file are used for allocating and |
20 | * binding to shared memory data structures. |
21 | * |
22 | * NOTES: |
23 | * (a) There are three kinds of shared memory data structures |
24 | * available to POSTGRES: fixed-size structures, queues and hash |
25 | * tables. Fixed-size structures contain things like global variables |
26 | * for a module and should never be allocated after the shared memory |
27 | * initialization phase. Hash tables have a fixed maximum size, but |
28 | * their actual size can vary dynamically. When entries are added |
29 | * to the table, more space is allocated. Queues link data structures |
30 | * that have been allocated either within fixed-size structures or as hash |
31 | * buckets. Each shared data structure has a string name to identify |
32 | * it (assigned in the module that declares it). |
33 | * |
34 | * (b) During initialization, each module looks for its |
35 | * shared data structures in a hash table called the "Shmem Index". |
36 | * If the data structure is not present, the caller can allocate |
37 | * a new one and initialize it. If the data structure is present, |
38 | * the caller "attaches" to the structure by initializing a pointer |
39 | * in the local address space. |
40 | * The shmem index has two purposes: first, it gives us |
41 | * a simple model of how the world looks when a backend process |
42 | * initializes. If something is present in the shmem index, |
43 | * it is initialized. If it is not, it is uninitialized. Second, |
44 | * the shmem index allows us to allocate shared memory on demand |
45 | * instead of trying to preallocate structures and hard-wire the |
46 | * sizes and locations in header files. If you are using a lot |
47 | * of shared memory in a lot of different places (and changing |
48 | * things during development), this is important. |
49 | * |
50 | * (c) In standard Unix-ish environments, individual backends do not |
51 | * need to re-establish their local pointers into shared memory, because |
52 | * they inherit correct values of those variables via fork() from the |
53 | * postmaster. However, this does not work in the EXEC_BACKEND case. |
54 | * In ports using EXEC_BACKEND, new backends have to set up their local |
55 | * pointers using the method described in (b) above. |
56 | * |
57 | * (d) memory allocation model: shared memory can never be |
58 | * freed, once allocated. Each hash table has its own free list, |
59 | * so hash buckets can be reused when an item is deleted. However, |
60 | * if one hash table grows very large and then shrinks, its space |
61 | * cannot be redistributed to other tables. We could build a simple |
62 | * hash bucket garbage collector if need be. Right now, it seems |
63 | * unnecessary. |
64 | */ |
65 | |
66 | #include "postgres.h" |
67 | |
68 | #include "access/transam.h" |
69 | #include "miscadmin.h" |
70 | #include "storage/lwlock.h" |
71 | #include "storage/pg_shmem.h" |
72 | #include "storage/shmem.h" |
73 | #include "storage/spin.h" |
74 | |
75 | |
76 | /* shared memory global variables */ |
77 | |
78 | static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */ |
79 | |
80 | static void *ShmemBase; /* start address of shared memory */ |
81 | |
82 | static void *ShmemEnd; /* end+1 address of shared memory */ |
83 | |
84 | slock_t *ShmemLock; /* spinlock for shared memory and LWLock |
85 | * allocation */ |
86 | |
87 | static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */ |
88 | |
89 | |
90 | /* |
91 | * InitShmemAccess() --- set up basic pointers to shared memory. |
92 | * |
93 | * Note: the argument should be declared "PGShmemHeader *seghdr", |
94 | * but we use void to avoid having to include ipc.h in shmem.h. |
95 | */ |
96 | void |
97 | InitShmemAccess(void *seghdr) |
98 | { |
99 | PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr; |
100 | |
101 | ShmemSegHdr = shmhdr; |
102 | ShmemBase = (void *) shmhdr; |
103 | ShmemEnd = (char *) ShmemBase + shmhdr->totalsize; |
104 | } |
105 | |
106 | /* |
107 | * InitShmemAllocation() --- set up shared-memory space allocation. |
108 | * |
109 | * This should be called only in the postmaster or a standalone backend. |
110 | */ |
111 | void |
112 | InitShmemAllocation(void) |
113 | { |
114 | PGShmemHeader *shmhdr = ShmemSegHdr; |
115 | char *aligned; |
116 | |
117 | Assert(shmhdr != NULL); |
118 | |
119 | /* |
120 | * Initialize the spinlock used by ShmemAlloc. We must use |
121 | * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet. |
122 | */ |
123 | ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t)); |
124 | |
125 | SpinLockInit(ShmemLock); |
126 | |
127 | /* |
128 | * Allocations after this point should go through ShmemAlloc, which |
129 | * expects to allocate everything on cache line boundaries. Make sure the |
130 | * first allocation begins on a cache line boundary. |
131 | */ |
132 | aligned = (char *) |
133 | (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset))); |
134 | shmhdr->freeoffset = aligned - (char *) shmhdr; |
135 | |
136 | /* ShmemIndex can't be set up yet (need LWLocks first) */ |
137 | shmhdr->index = NULL; |
138 | ShmemIndex = (HTAB *) NULL; |
139 | |
140 | /* |
141 | * Initialize ShmemVariableCache for transaction manager. (This doesn't |
142 | * really belong here, but not worth moving.) |
143 | */ |
144 | ShmemVariableCache = (VariableCache) |
145 | ShmemAlloc(sizeof(*ShmemVariableCache)); |
146 | memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache)); |
147 | } |
148 | |
149 | /* |
150 | * ShmemAlloc -- allocate max-aligned chunk from shared memory |
151 | * |
152 | * Throws error if request cannot be satisfied. |
153 | * |
154 | * Assumes ShmemLock and ShmemSegHdr are initialized. |
155 | */ |
156 | void * |
157 | ShmemAlloc(Size size) |
158 | { |
159 | void *newSpace; |
160 | |
161 | newSpace = ShmemAllocNoError(size); |
162 | if (!newSpace) |
163 | ereport(ERROR, |
164 | (errcode(ERRCODE_OUT_OF_MEMORY), |
165 | errmsg("out of shared memory (%zu bytes requested)" , |
166 | size))); |
167 | return newSpace; |
168 | } |
169 | |
170 | /* |
171 | * ShmemAllocNoError -- allocate max-aligned chunk from shared memory |
172 | * |
173 | * As ShmemAlloc, but returns NULL if out of space, rather than erroring. |
174 | */ |
175 | void * |
176 | ShmemAllocNoError(Size size) |
177 | { |
178 | Size newStart; |
179 | Size newFree; |
180 | void *newSpace; |
181 | |
182 | /* |
183 | * Ensure all space is adequately aligned. We used to only MAXALIGN this |
184 | * space but experience has proved that on modern systems that is not good |
185 | * enough. Many parts of the system are very sensitive to critical data |
186 | * structures getting split across cache line boundaries. To avoid that, |
187 | * attempt to align the beginning of the allocation to a cache line |
188 | * boundary. The calling code will still need to be careful about how it |
189 | * uses the allocated space - e.g. by padding each element in an array of |
190 | * structures out to a power-of-two size - but without this, even that |
191 | * won't be sufficient. |
192 | */ |
193 | size = CACHELINEALIGN(size); |
194 | |
195 | Assert(ShmemSegHdr != NULL); |
196 | |
197 | SpinLockAcquire(ShmemLock); |
198 | |
199 | newStart = ShmemSegHdr->freeoffset; |
200 | |
201 | newFree = newStart + size; |
202 | if (newFree <= ShmemSegHdr->totalsize) |
203 | { |
204 | newSpace = (void *) ((char *) ShmemBase + newStart); |
205 | ShmemSegHdr->freeoffset = newFree; |
206 | } |
207 | else |
208 | newSpace = NULL; |
209 | |
210 | SpinLockRelease(ShmemLock); |
211 | |
212 | /* note this assert is okay with newSpace == NULL */ |
213 | Assert(newSpace == (void *) CACHELINEALIGN(newSpace)); |
214 | |
215 | return newSpace; |
216 | } |
217 | |
218 | /* |
219 | * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory |
220 | * |
221 | * Allocate space without locking ShmemLock. This should be used for, |
222 | * and only for, allocations that must happen before ShmemLock is ready. |
223 | * |
224 | * We consider maxalign, rather than cachealign, sufficient here. |
225 | */ |
226 | void * |
227 | ShmemAllocUnlocked(Size size) |
228 | { |
229 | Size newStart; |
230 | Size newFree; |
231 | void *newSpace; |
232 | |
233 | /* |
234 | * Ensure allocated space is adequately aligned. |
235 | */ |
236 | size = MAXALIGN(size); |
237 | |
238 | Assert(ShmemSegHdr != NULL); |
239 | |
240 | newStart = ShmemSegHdr->freeoffset; |
241 | |
242 | newFree = newStart + size; |
243 | if (newFree > ShmemSegHdr->totalsize) |
244 | ereport(ERROR, |
245 | (errcode(ERRCODE_OUT_OF_MEMORY), |
246 | errmsg("out of shared memory (%zu bytes requested)" , |
247 | size))); |
248 | ShmemSegHdr->freeoffset = newFree; |
249 | |
250 | newSpace = (void *) ((char *) ShmemBase + newStart); |
251 | |
252 | Assert(newSpace == (void *) MAXALIGN(newSpace)); |
253 | |
254 | return newSpace; |
255 | } |
256 | |
257 | /* |
258 | * ShmemAddrIsValid -- test if an address refers to shared memory |
259 | * |
260 | * Returns true if the pointer points within the shared memory segment. |
261 | */ |
262 | bool |
263 | ShmemAddrIsValid(const void *addr) |
264 | { |
265 | return (addr >= ShmemBase) && (addr < ShmemEnd); |
266 | } |
267 | |
268 | /* |
269 | * InitShmemIndex() --- set up or attach to shmem index table. |
270 | */ |
271 | void |
272 | InitShmemIndex(void) |
273 | { |
274 | HASHCTL info; |
275 | int hash_flags; |
276 | |
277 | /* |
278 | * Create the shared memory shmem index. |
279 | * |
280 | * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex |
281 | * hashtable to exist already, we have a bit of a circularity problem in |
282 | * initializing the ShmemIndex itself. The special "ShmemIndex" hash |
283 | * table name will tell ShmemInitStruct to fake it. |
284 | */ |
285 | info.keysize = SHMEM_INDEX_KEYSIZE; |
286 | info.entrysize = sizeof(ShmemIndexEnt); |
287 | hash_flags = HASH_ELEM; |
288 | |
289 | ShmemIndex = ShmemInitHash("ShmemIndex" , |
290 | SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, |
291 | &info, hash_flags); |
292 | } |
293 | |
294 | /* |
295 | * ShmemInitHash -- Create and initialize, or attach to, a |
296 | * shared memory hash table. |
297 | * |
298 | * We assume caller is doing some kind of synchronization |
299 | * so that two processes don't try to create/initialize the same |
300 | * table at once. (In practice, all creations are done in the postmaster |
301 | * process; child processes should always be attaching to existing tables.) |
302 | * |
303 | * max_size is the estimated maximum number of hashtable entries. This is |
304 | * not a hard limit, but the access efficiency will degrade if it is |
305 | * exceeded substantially (since it's used to compute directory size and |
306 | * the hash table buckets will get overfull). |
307 | * |
308 | * init_size is the number of hashtable entries to preallocate. For a table |
309 | * whose maximum size is certain, this should be equal to max_size; that |
310 | * ensures that no run-time out-of-shared-memory failures can occur. |
311 | * |
312 | * Note: before Postgres 9.0, this function returned NULL for some failure |
313 | * cases. Now, it always throws error instead, so callers need not check |
314 | * for NULL. |
315 | */ |
316 | HTAB * |
317 | ShmemInitHash(const char *name, /* table string name for shmem index */ |
318 | long init_size, /* initial table size */ |
319 | long max_size, /* max size of the table */ |
320 | HASHCTL *infoP, /* info about key and bucket size */ |
321 | int hash_flags) /* info about infoP */ |
322 | { |
323 | bool found; |
324 | void *location; |
325 | |
326 | /* |
327 | * Hash tables allocated in shared memory have a fixed directory; it can't |
328 | * grow or other backends wouldn't be able to find it. So, make sure we |
329 | * make it big enough to start with. |
330 | * |
331 | * The shared memory allocator must be specified too. |
332 | */ |
333 | infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size); |
334 | infoP->alloc = ShmemAllocNoError; |
335 | hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE; |
336 | |
337 | /* look it up in the shmem index */ |
338 | location = ShmemInitStruct(name, |
339 | hash_get_shared_size(infoP, hash_flags), |
340 | &found); |
341 | |
342 | /* |
343 | * if it already exists, attach to it rather than allocate and initialize |
344 | * new space |
345 | */ |
346 | if (found) |
347 | hash_flags |= HASH_ATTACH; |
348 | |
349 | /* Pass location of hashtable header to hash_create */ |
350 | infoP->hctl = (HASHHDR *) location; |
351 | |
352 | return hash_create(name, init_size, infoP, hash_flags); |
353 | } |
354 | |
355 | /* |
356 | * ShmemInitStruct -- Create/attach to a structure in shared memory. |
357 | * |
358 | * This is called during initialization to find or allocate |
359 | * a data structure in shared memory. If no other process |
360 | * has created the structure, this routine allocates space |
361 | * for it. If it exists already, a pointer to the existing |
362 | * structure is returned. |
363 | * |
364 | * Returns: pointer to the object. *foundPtr is set true if the object was |
365 | * already in the shmem index (hence, already initialized). |
366 | * |
367 | * Note: before Postgres 9.0, this function returned NULL for some failure |
368 | * cases. Now, it always throws error instead, so callers need not check |
369 | * for NULL. |
370 | */ |
371 | void * |
372 | ShmemInitStruct(const char *name, Size size, bool *foundPtr) |
373 | { |
374 | ShmemIndexEnt *result; |
375 | void *structPtr; |
376 | |
377 | LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); |
378 | |
379 | if (!ShmemIndex) |
380 | { |
381 | PGShmemHeader *shmemseghdr = ShmemSegHdr; |
382 | |
383 | /* Must be trying to create/attach to ShmemIndex itself */ |
384 | Assert(strcmp(name, "ShmemIndex" ) == 0); |
385 | |
386 | if (IsUnderPostmaster) |
387 | { |
388 | /* Must be initializing a (non-standalone) backend */ |
389 | Assert(shmemseghdr->index != NULL); |
390 | structPtr = shmemseghdr->index; |
391 | *foundPtr = true; |
392 | } |
393 | else |
394 | { |
395 | /* |
396 | * If the shmem index doesn't exist, we are bootstrapping: we must |
397 | * be trying to init the shmem index itself. |
398 | * |
399 | * Notice that the ShmemIndexLock is released before the shmem |
400 | * index has been initialized. This should be OK because no other |
401 | * process can be accessing shared memory yet. |
402 | */ |
403 | Assert(shmemseghdr->index == NULL); |
404 | structPtr = ShmemAlloc(size); |
405 | shmemseghdr->index = structPtr; |
406 | *foundPtr = false; |
407 | } |
408 | LWLockRelease(ShmemIndexLock); |
409 | return structPtr; |
410 | } |
411 | |
412 | /* look it up in the shmem index */ |
413 | result = (ShmemIndexEnt *) |
414 | hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr); |
415 | |
416 | if (!result) |
417 | { |
418 | LWLockRelease(ShmemIndexLock); |
419 | ereport(ERROR, |
420 | (errcode(ERRCODE_OUT_OF_MEMORY), |
421 | errmsg("could not create ShmemIndex entry for data structure \"%s\"" , |
422 | name))); |
423 | } |
424 | |
425 | if (*foundPtr) |
426 | { |
427 | /* |
428 | * Structure is in the shmem index so someone else has allocated it |
429 | * already. The size better be the same as the size we are trying to |
430 | * initialize to, or there is a name conflict (or worse). |
431 | */ |
432 | if (result->size != size) |
433 | { |
434 | LWLockRelease(ShmemIndexLock); |
435 | ereport(ERROR, |
436 | (errmsg("ShmemIndex entry size is wrong for data structure" |
437 | " \"%s\": expected %zu, actual %zu" , |
438 | name, size, result->size))); |
439 | } |
440 | structPtr = result->location; |
441 | } |
442 | else |
443 | { |
444 | /* It isn't in the table yet. allocate and initialize it */ |
445 | structPtr = ShmemAllocNoError(size); |
446 | if (structPtr == NULL) |
447 | { |
448 | /* out of memory; remove the failed ShmemIndex entry */ |
449 | hash_search(ShmemIndex, name, HASH_REMOVE, NULL); |
450 | LWLockRelease(ShmemIndexLock); |
451 | ereport(ERROR, |
452 | (errcode(ERRCODE_OUT_OF_MEMORY), |
453 | errmsg("not enough shared memory for data structure" |
454 | " \"%s\" (%zu bytes requested)" , |
455 | name, size))); |
456 | } |
457 | result->size = size; |
458 | result->location = structPtr; |
459 | } |
460 | |
461 | LWLockRelease(ShmemIndexLock); |
462 | |
463 | Assert(ShmemAddrIsValid(structPtr)); |
464 | |
465 | Assert(structPtr == (void *) CACHELINEALIGN(structPtr)); |
466 | |
467 | return structPtr; |
468 | } |
469 | |
470 | |
471 | /* |
472 | * Add two Size values, checking for overflow |
473 | */ |
474 | Size |
475 | add_size(Size s1, Size s2) |
476 | { |
477 | Size result; |
478 | |
479 | result = s1 + s2; |
480 | /* We are assuming Size is an unsigned type here... */ |
481 | if (result < s1 || result < s2) |
482 | ereport(ERROR, |
483 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
484 | errmsg("requested shared memory size overflows size_t" ))); |
485 | return result; |
486 | } |
487 | |
488 | /* |
489 | * Multiply two Size values, checking for overflow |
490 | */ |
491 | Size |
492 | mul_size(Size s1, Size s2) |
493 | { |
494 | Size result; |
495 | |
496 | if (s1 == 0 || s2 == 0) |
497 | return 0; |
498 | result = s1 * s2; |
499 | /* We are assuming Size is an unsigned type here... */ |
500 | if (result / s2 != s1) |
501 | ereport(ERROR, |
502 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
503 | errmsg("requested shared memory size overflows size_t" ))); |
504 | return result; |
505 | } |
506 | |