1/*-------------------------------------------------------------------------
2 *
3 * localbuf.c
4 * local buffer manager. Fast buffer manager for temporary tables,
5 * which never need to be WAL-logged or checkpointed, etc.
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994-5, Regents of the University of California
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/storage/buffer/localbuf.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "postgres.h"
17
18#include "access/parallel.h"
19#include "catalog/catalog.h"
20#include "executor/instrument.h"
21#include "storage/buf_internals.h"
22#include "storage/bufmgr.h"
23#include "utils/guc.h"
24#include "utils/memutils.h"
25#include "utils/resowner_private.h"
26
27
28/*#define LBDEBUG*/
29
30/* entry for buffer lookup hashtable */
31typedef struct
32{
33 BufferTag key; /* Tag of a disk page */
34 int id; /* Associated local buffer's index */
35} LocalBufferLookupEnt;
36
37/* Note: this macro only works on local buffers, not shared ones! */
38#define LocalBufHdrGetBlock(bufHdr) \
39 LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
40
41int NLocBuffer = 0; /* until buffers are initialized */
42
43BufferDesc *LocalBufferDescriptors = NULL;
44Block *LocalBufferBlockPointers = NULL;
45int32 *LocalRefCount = NULL;
46
47static int nextFreeLocalBuf = 0;
48
49static HTAB *LocalBufHash = NULL;
50
51
52static void InitLocalBuffers(void);
53static Block GetLocalBufferStorage(void);
54
55
56/*
57 * LocalPrefetchBuffer -
58 * initiate asynchronous read of a block of a relation
59 *
60 * Do PrefetchBuffer's work for temporary relations.
61 * No-op if prefetching isn't compiled in.
62 */
63void
64LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
65 BlockNumber blockNum)
66{
67#ifdef USE_PREFETCH
68 BufferTag newTag; /* identity of requested block */
69 LocalBufferLookupEnt *hresult;
70
71 INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
72
73 /* Initialize local buffers if first request in this session */
74 if (LocalBufHash == NULL)
75 InitLocalBuffers();
76
77 /* See if the desired buffer already exists */
78 hresult = (LocalBufferLookupEnt *)
79 hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
80
81 if (hresult)
82 {
83 /* Yes, so nothing to do */
84 return;
85 }
86
87 /* Not in buffers, so initiate prefetch */
88 smgrprefetch(smgr, forkNum, blockNum);
89#endif /* USE_PREFETCH */
90}
91
92
93/*
94 * LocalBufferAlloc -
95 * Find or create a local buffer for the given page of the given relation.
96 *
97 * API is similar to bufmgr.c's BufferAlloc, except that we do not need
98 * to do any locking since this is all local. Also, IO_IN_PROGRESS
99 * does not get set. Lastly, we support only default access strategy
100 * (hence, usage_count is always advanced).
101 */
102BufferDesc *
103LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
104 bool *foundPtr)
105{
106 BufferTag newTag; /* identity of requested block */
107 LocalBufferLookupEnt *hresult;
108 BufferDesc *bufHdr;
109 int b;
110 int trycounter;
111 bool found;
112 uint32 buf_state;
113
114 INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
115
116 /* Initialize local buffers if first request in this session */
117 if (LocalBufHash == NULL)
118 InitLocalBuffers();
119
120 /* See if the desired buffer already exists */
121 hresult = (LocalBufferLookupEnt *)
122 hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
123
124 if (hresult)
125 {
126 b = hresult->id;
127 bufHdr = GetLocalBufferDescriptor(b);
128 Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag));
129#ifdef LBDEBUG
130 fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
131 smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1);
132#endif
133 buf_state = pg_atomic_read_u32(&bufHdr->state);
134
135 /* this part is equivalent to PinBuffer for a shared buffer */
136 if (LocalRefCount[b] == 0)
137 {
138 if (BUF_STATE_GET_USAGECOUNT(buf_state) < BM_MAX_USAGE_COUNT)
139 {
140 buf_state += BUF_USAGECOUNT_ONE;
141 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
142 }
143 }
144 LocalRefCount[b]++;
145 ResourceOwnerRememberBuffer(CurrentResourceOwner,
146 BufferDescriptorGetBuffer(bufHdr));
147 if (buf_state & BM_VALID)
148 *foundPtr = true;
149 else
150 {
151 /* Previous read attempt must have failed; try again */
152 *foundPtr = false;
153 }
154 return bufHdr;
155 }
156
157#ifdef LBDEBUG
158 fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
159 smgr->smgr_rnode.node.relNode, forkNum, blockNum,
160 -nextFreeLocalBuf - 1);
161#endif
162
163 /*
164 * Need to get a new buffer. We use a clock sweep algorithm (essentially
165 * the same as what freelist.c does now...)
166 */
167 trycounter = NLocBuffer;
168 for (;;)
169 {
170 b = nextFreeLocalBuf;
171
172 if (++nextFreeLocalBuf >= NLocBuffer)
173 nextFreeLocalBuf = 0;
174
175 bufHdr = GetLocalBufferDescriptor(b);
176
177 if (LocalRefCount[b] == 0)
178 {
179 buf_state = pg_atomic_read_u32(&bufHdr->state);
180
181 if (BUF_STATE_GET_USAGECOUNT(buf_state) > 0)
182 {
183 buf_state -= BUF_USAGECOUNT_ONE;
184 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
185 trycounter = NLocBuffer;
186 }
187 else
188 {
189 /* Found a usable buffer */
190 LocalRefCount[b]++;
191 ResourceOwnerRememberBuffer(CurrentResourceOwner,
192 BufferDescriptorGetBuffer(bufHdr));
193 break;
194 }
195 }
196 else if (--trycounter == 0)
197 ereport(ERROR,
198 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
199 errmsg("no empty local buffer available")));
200 }
201
202 /*
203 * this buffer is not referenced but it might still be dirty. if that's
204 * the case, write it out before reusing it!
205 */
206 if (buf_state & BM_DIRTY)
207 {
208 SMgrRelation oreln;
209 Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
210
211 /* Find smgr relation for buffer */
212 oreln = smgropen(bufHdr->tag.rnode, MyBackendId);
213
214 PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
215
216 /* And write... */
217 smgrwrite(oreln,
218 bufHdr->tag.forkNum,
219 bufHdr->tag.blockNum,
220 localpage,
221 false);
222
223 /* Mark not-dirty now in case we error out below */
224 buf_state &= ~BM_DIRTY;
225 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
226
227 pgBufferUsage.local_blks_written++;
228 }
229
230 /*
231 * lazy memory allocation: allocate space on first use of a buffer.
232 */
233 if (LocalBufHdrGetBlock(bufHdr) == NULL)
234 {
235 /* Set pointer for use by BufferGetBlock() macro */
236 LocalBufHdrGetBlock(bufHdr) = GetLocalBufferStorage();
237 }
238
239 /*
240 * Update the hash table: remove old entry, if any, and make new one.
241 */
242 if (buf_state & BM_TAG_VALID)
243 {
244 hresult = (LocalBufferLookupEnt *)
245 hash_search(LocalBufHash, (void *) &bufHdr->tag,
246 HASH_REMOVE, NULL);
247 if (!hresult) /* shouldn't happen */
248 elog(ERROR, "local buffer hash table corrupted");
249 /* mark buffer invalid just in case hash insert fails */
250 CLEAR_BUFFERTAG(bufHdr->tag);
251 buf_state &= ~(BM_VALID | BM_TAG_VALID);
252 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
253 }
254
255 hresult = (LocalBufferLookupEnt *)
256 hash_search(LocalBufHash, (void *) &newTag, HASH_ENTER, &found);
257 if (found) /* shouldn't happen */
258 elog(ERROR, "local buffer hash table corrupted");
259 hresult->id = b;
260
261 /*
262 * it's all ours now.
263 */
264 bufHdr->tag = newTag;
265 buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
266 buf_state |= BM_TAG_VALID;
267 buf_state &= ~BUF_USAGECOUNT_MASK;
268 buf_state += BUF_USAGECOUNT_ONE;
269 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
270
271 *foundPtr = false;
272 return bufHdr;
273}
274
275/*
276 * MarkLocalBufferDirty -
277 * mark a local buffer dirty
278 */
279void
280MarkLocalBufferDirty(Buffer buffer)
281{
282 int bufid;
283 BufferDesc *bufHdr;
284 uint32 buf_state;
285
286 Assert(BufferIsLocal(buffer));
287
288#ifdef LBDEBUG
289 fprintf(stderr, "LB DIRTY %d\n", buffer);
290#endif
291
292 bufid = -(buffer + 1);
293
294 Assert(LocalRefCount[bufid] > 0);
295
296 bufHdr = GetLocalBufferDescriptor(bufid);
297
298 buf_state = pg_atomic_read_u32(&bufHdr->state);
299
300 if (!(buf_state & BM_DIRTY))
301 pgBufferUsage.local_blks_dirtied++;
302
303 buf_state |= BM_DIRTY;
304
305 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
306}
307
308/*
309 * DropRelFileNodeLocalBuffers
310 * This function removes from the buffer pool all the pages of the
311 * specified relation that have block numbers >= firstDelBlock.
312 * (In particular, with firstDelBlock = 0, all pages are removed.)
313 * Dirty pages are simply dropped, without bothering to write them
314 * out first. Therefore, this is NOT rollback-able, and so should be
315 * used only with extreme caution!
316 *
317 * See DropRelFileNodeBuffers in bufmgr.c for more notes.
318 */
319void
320DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
321 BlockNumber firstDelBlock)
322{
323 int i;
324
325 for (i = 0; i < NLocBuffer; i++)
326 {
327 BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
328 LocalBufferLookupEnt *hresult;
329 uint32 buf_state;
330
331 buf_state = pg_atomic_read_u32(&bufHdr->state);
332
333 if ((buf_state & BM_TAG_VALID) &&
334 RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
335 bufHdr->tag.forkNum == forkNum &&
336 bufHdr->tag.blockNum >= firstDelBlock)
337 {
338 if (LocalRefCount[i] != 0)
339 elog(ERROR, "block %u of %s is still referenced (local %u)",
340 bufHdr->tag.blockNum,
341 relpathbackend(bufHdr->tag.rnode, MyBackendId,
342 bufHdr->tag.forkNum),
343 LocalRefCount[i]);
344 /* Remove entry from hashtable */
345 hresult = (LocalBufferLookupEnt *)
346 hash_search(LocalBufHash, (void *) &bufHdr->tag,
347 HASH_REMOVE, NULL);
348 if (!hresult) /* shouldn't happen */
349 elog(ERROR, "local buffer hash table corrupted");
350 /* Mark buffer invalid */
351 CLEAR_BUFFERTAG(bufHdr->tag);
352 buf_state &= ~BUF_FLAG_MASK;
353 buf_state &= ~BUF_USAGECOUNT_MASK;
354 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
355 }
356 }
357}
358
359/*
360 * DropRelFileNodeAllLocalBuffers
361 * This function removes from the buffer pool all pages of all forks
362 * of the specified relation.
363 *
364 * See DropRelFileNodeAllBuffers in bufmgr.c for more notes.
365 */
366void
367DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
368{
369 int i;
370
371 for (i = 0; i < NLocBuffer; i++)
372 {
373 BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
374 LocalBufferLookupEnt *hresult;
375 uint32 buf_state;
376
377 buf_state = pg_atomic_read_u32(&bufHdr->state);
378
379 if ((buf_state & BM_TAG_VALID) &&
380 RelFileNodeEquals(bufHdr->tag.rnode, rnode))
381 {
382 if (LocalRefCount[i] != 0)
383 elog(ERROR, "block %u of %s is still referenced (local %u)",
384 bufHdr->tag.blockNum,
385 relpathbackend(bufHdr->tag.rnode, MyBackendId,
386 bufHdr->tag.forkNum),
387 LocalRefCount[i]);
388 /* Remove entry from hashtable */
389 hresult = (LocalBufferLookupEnt *)
390 hash_search(LocalBufHash, (void *) &bufHdr->tag,
391 HASH_REMOVE, NULL);
392 if (!hresult) /* shouldn't happen */
393 elog(ERROR, "local buffer hash table corrupted");
394 /* Mark buffer invalid */
395 CLEAR_BUFFERTAG(bufHdr->tag);
396 buf_state &= ~BUF_FLAG_MASK;
397 buf_state &= ~BUF_USAGECOUNT_MASK;
398 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
399 }
400 }
401}
402
403/*
404 * InitLocalBuffers -
405 * init the local buffer cache. Since most queries (esp. multi-user ones)
406 * don't involve local buffers, we delay allocating actual memory for the
407 * buffers until we need them; just make the buffer headers here.
408 */
409static void
410InitLocalBuffers(void)
411{
412 int nbufs = num_temp_buffers;
413 HASHCTL info;
414 int i;
415
416 /*
417 * Parallel workers can't access data in temporary tables, because they
418 * have no visibility into the local buffers of their leader. This is a
419 * convenient, low-cost place to provide a backstop check for that. Note
420 * that we don't wish to prevent a parallel worker from accessing catalog
421 * metadata about a temp table, so checks at higher levels would be
422 * inappropriate.
423 */
424 if (IsParallelWorker())
425 ereport(ERROR,
426 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
427 errmsg("cannot access temporary tables during a parallel operation")));
428
429 /* Allocate and zero buffer headers and auxiliary arrays */
430 LocalBufferDescriptors = (BufferDesc *) calloc(nbufs, sizeof(BufferDesc));
431 LocalBufferBlockPointers = (Block *) calloc(nbufs, sizeof(Block));
432 LocalRefCount = (int32 *) calloc(nbufs, sizeof(int32));
433 if (!LocalBufferDescriptors || !LocalBufferBlockPointers || !LocalRefCount)
434 ereport(FATAL,
435 (errcode(ERRCODE_OUT_OF_MEMORY),
436 errmsg("out of memory")));
437
438 nextFreeLocalBuf = 0;
439
440 /* initialize fields that need to start off nonzero */
441 for (i = 0; i < nbufs; i++)
442 {
443 BufferDesc *buf = GetLocalBufferDescriptor(i);
444
445 /*
446 * negative to indicate local buffer. This is tricky: shared buffers
447 * start with 0. We have to start with -2. (Note that the routine
448 * BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id
449 * is -1.)
450 */
451 buf->buf_id = -i - 2;
452
453 /*
454 * Intentionally do not initialize the buffer's atomic variable
455 * (besides zeroing the underlying memory above). That way we get
456 * errors on platforms without atomics, if somebody (re-)introduces
457 * atomic operations for local buffers.
458 */
459 }
460
461 /* Create the lookup hash table */
462 MemSet(&info, 0, sizeof(info));
463 info.keysize = sizeof(BufferTag);
464 info.entrysize = sizeof(LocalBufferLookupEnt);
465
466 LocalBufHash = hash_create("Local Buffer Lookup Table",
467 nbufs,
468 &info,
469 HASH_ELEM | HASH_BLOBS);
470
471 if (!LocalBufHash)
472 elog(ERROR, "could not initialize local buffer hash table");
473
474 /* Initialization done, mark buffers allocated */
475 NLocBuffer = nbufs;
476}
477
478/*
479 * GetLocalBufferStorage - allocate memory for a local buffer
480 *
481 * The idea of this function is to aggregate our requests for storage
482 * so that the memory manager doesn't see a whole lot of relatively small
483 * requests. Since we'll never give back a local buffer once it's created
484 * within a particular process, no point in burdening memmgr with separately
485 * managed chunks.
486 */
487static Block
488GetLocalBufferStorage(void)
489{
490 static char *cur_block = NULL;
491 static int next_buf_in_block = 0;
492 static int num_bufs_in_block = 0;
493 static int total_bufs_allocated = 0;
494 static MemoryContext LocalBufferContext = NULL;
495
496 char *this_buf;
497
498 Assert(total_bufs_allocated < NLocBuffer);
499
500 if (next_buf_in_block >= num_bufs_in_block)
501 {
502 /* Need to make a new request to memmgr */
503 int num_bufs;
504
505 /*
506 * We allocate local buffers in a context of their own, so that the
507 * space eaten for them is easily recognizable in MemoryContextStats
508 * output. Create the context on first use.
509 */
510 if (LocalBufferContext == NULL)
511 LocalBufferContext =
512 AllocSetContextCreate(TopMemoryContext,
513 "LocalBufferContext",
514 ALLOCSET_DEFAULT_SIZES);
515
516 /* Start with a 16-buffer request; subsequent ones double each time */
517 num_bufs = Max(num_bufs_in_block * 2, 16);
518 /* But not more than what we need for all remaining local bufs */
519 num_bufs = Min(num_bufs, NLocBuffer - total_bufs_allocated);
520 /* And don't overflow MaxAllocSize, either */
521 num_bufs = Min(num_bufs, MaxAllocSize / BLCKSZ);
522
523 cur_block = (char *) MemoryContextAlloc(LocalBufferContext,
524 num_bufs * BLCKSZ);
525 next_buf_in_block = 0;
526 num_bufs_in_block = num_bufs;
527 }
528
529 /* Allocate next buffer in current memory block */
530 this_buf = cur_block + next_buf_in_block * BLCKSZ;
531 next_buf_in_block++;
532 total_bufs_allocated++;
533
534 return (Block) this_buf;
535}
536
537/*
538 * CheckForLocalBufferLeaks - ensure this backend holds no local buffer pins
539 *
540 * This is just like CheckForBufferLeaks(), but for local buffers.
541 */
542static void
543CheckForLocalBufferLeaks(void)
544{
545#ifdef USE_ASSERT_CHECKING
546 if (LocalRefCount)
547 {
548 int RefCountErrors = 0;
549 int i;
550
551 for (i = 0; i < NLocBuffer; i++)
552 {
553 if (LocalRefCount[i] != 0)
554 {
555 Buffer b = -i - 1;
556
557 PrintBufferLeakWarning(b);
558 RefCountErrors++;
559 }
560 }
561 Assert(RefCountErrors == 0);
562 }
563#endif
564}
565
566/*
567 * AtEOXact_LocalBuffers - clean up at end of transaction.
568 *
569 * This is just like AtEOXact_Buffers, but for local buffers.
570 */
571void
572AtEOXact_LocalBuffers(bool isCommit)
573{
574 CheckForLocalBufferLeaks();
575}
576
577/*
578 * AtProcExit_LocalBuffers - ensure we have dropped pins during backend exit.
579 *
580 * This is just like AtProcExit_Buffers, but for local buffers.
581 */
582void
583AtProcExit_LocalBuffers(void)
584{
585 /*
586 * We shouldn't be holding any remaining pins; if we are, and assertions
587 * aren't enabled, we'll fail later in DropRelFileNodeBuffers while trying
588 * to drop the temp rels.
589 */
590 CheckForLocalBufferLeaks();
591}
592