1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * freelist.c |
4 | * routines for managing the buffer pool's replacement strategy. |
5 | * |
6 | * |
7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
8 | * Portions Copyright (c) 1994, Regents of the University of California |
9 | * |
10 | * |
11 | * IDENTIFICATION |
12 | * src/backend/storage/buffer/freelist.c |
13 | * |
14 | *------------------------------------------------------------------------- |
15 | */ |
16 | #include "postgres.h" |
17 | |
18 | #include "port/atomics.h" |
19 | #include "storage/buf_internals.h" |
20 | #include "storage/bufmgr.h" |
21 | #include "storage/proc.h" |
22 | |
23 | #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var)))) |
24 | |
25 | |
26 | /* |
27 | * The shared freelist control information. |
28 | */ |
29 | typedef struct |
30 | { |
31 | /* Spinlock: protects the values below */ |
32 | slock_t buffer_strategy_lock; |
33 | |
34 | /* |
35 | * Clock sweep hand: index of next buffer to consider grabbing. Note that |
36 | * this isn't a concrete buffer - we only ever increase the value. So, to |
37 | * get an actual buffer, it needs to be used modulo NBuffers. |
38 | */ |
39 | pg_atomic_uint32 nextVictimBuffer; |
40 | |
41 | int firstFreeBuffer; /* Head of list of unused buffers */ |
42 | int lastFreeBuffer; /* Tail of list of unused buffers */ |
43 | |
44 | /* |
45 | * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is, |
46 | * when the list is empty) |
47 | */ |
48 | |
49 | /* |
50 | * Statistics. These counters should be wide enough that they can't |
51 | * overflow during a single bgwriter cycle. |
52 | */ |
53 | uint32 completePasses; /* Complete cycles of the clock sweep */ |
54 | pg_atomic_uint32 numBufferAllocs; /* Buffers allocated since last reset */ |
55 | |
56 | /* |
57 | * Bgworker process to be notified upon activity or -1 if none. See |
58 | * StrategyNotifyBgWriter. |
59 | */ |
60 | int bgwprocno; |
61 | } BufferStrategyControl; |
62 | |
63 | /* Pointers to shared state */ |
64 | static BufferStrategyControl *StrategyControl = NULL; |
65 | |
66 | /* |
67 | * Private (non-shared) state for managing a ring of shared buffers to re-use. |
68 | * This is currently the only kind of BufferAccessStrategy object, but someday |
69 | * we might have more kinds. |
70 | */ |
71 | typedef struct BufferAccessStrategyData |
72 | { |
73 | /* Overall strategy type */ |
74 | BufferAccessStrategyType btype; |
75 | /* Number of elements in buffers[] array */ |
76 | int ring_size; |
77 | |
78 | /* |
79 | * Index of the "current" slot in the ring, ie, the one most recently |
80 | * returned by GetBufferFromRing. |
81 | */ |
82 | int current; |
83 | |
84 | /* |
85 | * True if the buffer just returned by StrategyGetBuffer had been in the |
86 | * ring already. |
87 | */ |
88 | bool current_was_in_ring; |
89 | |
90 | /* |
91 | * Array of buffer numbers. InvalidBuffer (that is, zero) indicates we |
92 | * have not yet selected a buffer for this ring slot. For allocation |
93 | * simplicity this is palloc'd together with the fixed fields of the |
94 | * struct. |
95 | */ |
96 | Buffer buffers[FLEXIBLE_ARRAY_MEMBER]; |
97 | } BufferAccessStrategyData; |
98 | |
99 | |
100 | /* Prototypes for internal functions */ |
101 | static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy, |
102 | uint32 *buf_state); |
103 | static void AddBufferToRing(BufferAccessStrategy strategy, |
104 | BufferDesc *buf); |
105 | |
106 | /* |
107 | * ClockSweepTick - Helper routine for StrategyGetBuffer() |
108 | * |
109 | * Move the clock hand one buffer ahead of its current position and return the |
110 | * id of the buffer now under the hand. |
111 | */ |
112 | static inline uint32 |
113 | ClockSweepTick(void) |
114 | { |
115 | uint32 victim; |
116 | |
117 | /* |
118 | * Atomically move hand ahead one buffer - if there's several processes |
119 | * doing this, this can lead to buffers being returned slightly out of |
120 | * apparent order. |
121 | */ |
122 | victim = |
123 | pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1); |
124 | |
125 | if (victim >= NBuffers) |
126 | { |
127 | uint32 originalVictim = victim; |
128 | |
129 | /* always wrap what we look up in BufferDescriptors */ |
130 | victim = victim % NBuffers; |
131 | |
132 | /* |
133 | * If we're the one that just caused a wraparound, force |
134 | * completePasses to be incremented while holding the spinlock. We |
135 | * need the spinlock so StrategySyncStart() can return a consistent |
136 | * value consisting of nextVictimBuffer and completePasses. |
137 | */ |
138 | if (victim == 0) |
139 | { |
140 | uint32 expected; |
141 | uint32 wrapped; |
142 | bool success = false; |
143 | |
144 | expected = originalVictim + 1; |
145 | |
146 | while (!success) |
147 | { |
148 | /* |
149 | * Acquire the spinlock while increasing completePasses. That |
150 | * allows other readers to read nextVictimBuffer and |
151 | * completePasses in a consistent manner which is required for |
152 | * StrategySyncStart(). In theory delaying the increment |
153 | * could lead to an overflow of nextVictimBuffers, but that's |
154 | * highly unlikely and wouldn't be particularly harmful. |
155 | */ |
156 | SpinLockAcquire(&StrategyControl->buffer_strategy_lock); |
157 | |
158 | wrapped = expected % NBuffers; |
159 | |
160 | success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer, |
161 | &expected, wrapped); |
162 | if (success) |
163 | StrategyControl->completePasses++; |
164 | SpinLockRelease(&StrategyControl->buffer_strategy_lock); |
165 | } |
166 | } |
167 | } |
168 | return victim; |
169 | } |
170 | |
171 | /* |
172 | * have_free_buffer -- a lockless check to see if there is a free buffer in |
173 | * buffer pool. |
174 | * |
175 | * If the result is true that will become stale once free buffers are moved out |
176 | * by other operations, so the caller who strictly want to use a free buffer |
177 | * should not call this. |
178 | */ |
179 | bool |
180 | have_free_buffer() |
181 | { |
182 | if (StrategyControl->firstFreeBuffer >= 0) |
183 | return true; |
184 | else |
185 | return false; |
186 | } |
187 | |
188 | /* |
189 | * StrategyGetBuffer |
190 | * |
191 | * Called by the bufmgr to get the next candidate buffer to use in |
192 | * BufferAlloc(). The only hard requirement BufferAlloc() has is that |
193 | * the selected buffer must not currently be pinned by anyone. |
194 | * |
195 | * strategy is a BufferAccessStrategy object, or NULL for default strategy. |
196 | * |
197 | * To ensure that no one else can pin the buffer before we do, we must |
198 | * return the buffer with the buffer header spinlock still held. |
199 | */ |
200 | BufferDesc * |
201 | StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) |
202 | { |
203 | BufferDesc *buf; |
204 | int bgwprocno; |
205 | int trycounter; |
206 | uint32 local_buf_state; /* to avoid repeated (de-)referencing */ |
207 | |
208 | /* |
209 | * If given a strategy object, see whether it can select a buffer. We |
210 | * assume strategy objects don't need buffer_strategy_lock. |
211 | */ |
212 | if (strategy != NULL) |
213 | { |
214 | buf = GetBufferFromRing(strategy, buf_state); |
215 | if (buf != NULL) |
216 | return buf; |
217 | } |
218 | |
219 | /* |
220 | * If asked, we need to waken the bgwriter. Since we don't want to rely on |
221 | * a spinlock for this we force a read from shared memory once, and then |
222 | * set the latch based on that value. We need to go through that length |
223 | * because otherwise bgwprocno might be reset while/after we check because |
224 | * the compiler might just reread from memory. |
225 | * |
226 | * This can possibly set the latch of the wrong process if the bgwriter |
227 | * dies in the wrong moment. But since PGPROC->procLatch is never |
228 | * deallocated the worst consequence of that is that we set the latch of |
229 | * some arbitrary process. |
230 | */ |
231 | bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno); |
232 | if (bgwprocno != -1) |
233 | { |
234 | /* reset bgwprocno first, before setting the latch */ |
235 | StrategyControl->bgwprocno = -1; |
236 | |
237 | /* |
238 | * Not acquiring ProcArrayLock here which is slightly icky. It's |
239 | * actually fine because procLatch isn't ever freed, so we just can |
240 | * potentially set the wrong process' (or no process') latch. |
241 | */ |
242 | SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch); |
243 | } |
244 | |
245 | /* |
246 | * We count buffer allocation requests so that the bgwriter can estimate |
247 | * the rate of buffer consumption. Note that buffers recycled by a |
248 | * strategy object are intentionally not counted here. |
249 | */ |
250 | pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1); |
251 | |
252 | /* |
253 | * First check, without acquiring the lock, whether there's buffers in the |
254 | * freelist. Since we otherwise don't require the spinlock in every |
255 | * StrategyGetBuffer() invocation, it'd be sad to acquire it here - |
256 | * uselessly in most cases. That obviously leaves a race where a buffer is |
257 | * put on the freelist but we don't see the store yet - but that's pretty |
258 | * harmless, it'll just get used during the next buffer acquisition. |
259 | * |
260 | * If there's buffers on the freelist, acquire the spinlock to pop one |
261 | * buffer of the freelist. Then check whether that buffer is usable and |
262 | * repeat if not. |
263 | * |
264 | * Note that the freeNext fields are considered to be protected by the |
265 | * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to |
266 | * manipulate them without holding the spinlock. |
267 | */ |
268 | if (StrategyControl->firstFreeBuffer >= 0) |
269 | { |
270 | while (true) |
271 | { |
272 | /* Acquire the spinlock to remove element from the freelist */ |
273 | SpinLockAcquire(&StrategyControl->buffer_strategy_lock); |
274 | |
275 | if (StrategyControl->firstFreeBuffer < 0) |
276 | { |
277 | SpinLockRelease(&StrategyControl->buffer_strategy_lock); |
278 | break; |
279 | } |
280 | |
281 | buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer); |
282 | Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); |
283 | |
284 | /* Unconditionally remove buffer from freelist */ |
285 | StrategyControl->firstFreeBuffer = buf->freeNext; |
286 | buf->freeNext = FREENEXT_NOT_IN_LIST; |
287 | |
288 | /* |
289 | * Release the lock so someone else can access the freelist while |
290 | * we check out this buffer. |
291 | */ |
292 | SpinLockRelease(&StrategyControl->buffer_strategy_lock); |
293 | |
294 | /* |
295 | * If the buffer is pinned or has a nonzero usage_count, we cannot |
296 | * use it; discard it and retry. (This can only happen if VACUUM |
297 | * put a valid buffer in the freelist and then someone else used |
298 | * it before we got to it. It's probably impossible altogether as |
299 | * of 8.3, but we'd better check anyway.) |
300 | */ |
301 | local_buf_state = LockBufHdr(buf); |
302 | if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0 |
303 | && BUF_STATE_GET_USAGECOUNT(local_buf_state) == 0) |
304 | { |
305 | if (strategy != NULL) |
306 | AddBufferToRing(strategy, buf); |
307 | *buf_state = local_buf_state; |
308 | return buf; |
309 | } |
310 | UnlockBufHdr(buf, local_buf_state); |
311 | |
312 | } |
313 | } |
314 | |
315 | /* Nothing on the freelist, so run the "clock sweep" algorithm */ |
316 | trycounter = NBuffers; |
317 | for (;;) |
318 | { |
319 | buf = GetBufferDescriptor(ClockSweepTick()); |
320 | |
321 | /* |
322 | * If the buffer is pinned or has a nonzero usage_count, we cannot use |
323 | * it; decrement the usage_count (unless pinned) and keep scanning. |
324 | */ |
325 | local_buf_state = LockBufHdr(buf); |
326 | |
327 | if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0) |
328 | { |
329 | if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0) |
330 | { |
331 | local_buf_state -= BUF_USAGECOUNT_ONE; |
332 | |
333 | trycounter = NBuffers; |
334 | } |
335 | else |
336 | { |
337 | /* Found a usable buffer */ |
338 | if (strategy != NULL) |
339 | AddBufferToRing(strategy, buf); |
340 | *buf_state = local_buf_state; |
341 | return buf; |
342 | } |
343 | } |
344 | else if (--trycounter == 0) |
345 | { |
346 | /* |
347 | * We've scanned all the buffers without making any state changes, |
348 | * so all the buffers are pinned (or were when we looked at them). |
349 | * We could hope that someone will free one eventually, but it's |
350 | * probably better to fail than to risk getting stuck in an |
351 | * infinite loop. |
352 | */ |
353 | UnlockBufHdr(buf, local_buf_state); |
354 | elog(ERROR, "no unpinned buffers available" ); |
355 | } |
356 | UnlockBufHdr(buf, local_buf_state); |
357 | } |
358 | } |
359 | |
360 | /* |
361 | * StrategyFreeBuffer: put a buffer on the freelist |
362 | */ |
363 | void |
364 | StrategyFreeBuffer(BufferDesc *buf) |
365 | { |
366 | SpinLockAcquire(&StrategyControl->buffer_strategy_lock); |
367 | |
368 | /* |
369 | * It is possible that we are told to put something in the freelist that |
370 | * is already in it; don't screw up the list if so. |
371 | */ |
372 | if (buf->freeNext == FREENEXT_NOT_IN_LIST) |
373 | { |
374 | buf->freeNext = StrategyControl->firstFreeBuffer; |
375 | if (buf->freeNext < 0) |
376 | StrategyControl->lastFreeBuffer = buf->buf_id; |
377 | StrategyControl->firstFreeBuffer = buf->buf_id; |
378 | } |
379 | |
380 | SpinLockRelease(&StrategyControl->buffer_strategy_lock); |
381 | } |
382 | |
383 | /* |
384 | * StrategySyncStart -- tell BufferSync where to start syncing |
385 | * |
386 | * The result is the buffer index of the best buffer to sync first. |
387 | * BufferSync() will proceed circularly around the buffer array from there. |
388 | * |
389 | * In addition, we return the completed-pass count (which is effectively |
390 | * the higher-order bits of nextVictimBuffer) and the count of recent buffer |
391 | * allocs if non-NULL pointers are passed. The alloc count is reset after |
392 | * being read. |
393 | */ |
394 | int |
395 | StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc) |
396 | { |
397 | uint32 nextVictimBuffer; |
398 | int result; |
399 | |
400 | SpinLockAcquire(&StrategyControl->buffer_strategy_lock); |
401 | nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer); |
402 | result = nextVictimBuffer % NBuffers; |
403 | |
404 | if (complete_passes) |
405 | { |
406 | *complete_passes = StrategyControl->completePasses; |
407 | |
408 | /* |
409 | * Additionally add the number of wraparounds that happened before |
410 | * completePasses could be incremented. C.f. ClockSweepTick(). |
411 | */ |
412 | *complete_passes += nextVictimBuffer / NBuffers; |
413 | } |
414 | |
415 | if (num_buf_alloc) |
416 | { |
417 | *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0); |
418 | } |
419 | SpinLockRelease(&StrategyControl->buffer_strategy_lock); |
420 | return result; |
421 | } |
422 | |
423 | /* |
424 | * StrategyNotifyBgWriter -- set or clear allocation notification latch |
425 | * |
426 | * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will |
427 | * set that latch. Pass -1 to clear the pending notification before it |
428 | * happens. This feature is used by the bgwriter process to wake itself up |
429 | * from hibernation, and is not meant for anybody else to use. |
430 | */ |
431 | void |
432 | StrategyNotifyBgWriter(int bgwprocno) |
433 | { |
434 | /* |
435 | * We acquire buffer_strategy_lock just to ensure that the store appears |
436 | * atomic to StrategyGetBuffer. The bgwriter should call this rather |
437 | * infrequently, so there's no performance penalty from being safe. |
438 | */ |
439 | SpinLockAcquire(&StrategyControl->buffer_strategy_lock); |
440 | StrategyControl->bgwprocno = bgwprocno; |
441 | SpinLockRelease(&StrategyControl->buffer_strategy_lock); |
442 | } |
443 | |
444 | |
445 | /* |
446 | * StrategyShmemSize |
447 | * |
448 | * estimate the size of shared memory used by the freelist-related structures. |
449 | * |
450 | * Note: for somewhat historical reasons, the buffer lookup hashtable size |
451 | * is also determined here. |
452 | */ |
453 | Size |
454 | StrategyShmemSize(void) |
455 | { |
456 | Size size = 0; |
457 | |
458 | /* size of lookup hash table ... see comment in StrategyInitialize */ |
459 | size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS)); |
460 | |
461 | /* size of the shared replacement strategy control block */ |
462 | size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl))); |
463 | |
464 | return size; |
465 | } |
466 | |
467 | /* |
468 | * StrategyInitialize -- initialize the buffer cache replacement |
469 | * strategy. |
470 | * |
471 | * Assumes: All of the buffers are already built into a linked list. |
472 | * Only called by postmaster and only during initialization. |
473 | */ |
474 | void |
475 | StrategyInitialize(bool init) |
476 | { |
477 | bool found; |
478 | |
479 | /* |
480 | * Initialize the shared buffer lookup hashtable. |
481 | * |
482 | * Since we can't tolerate running out of lookup table entries, we must be |
483 | * sure to specify an adequate table size here. The maximum steady-state |
484 | * usage is of course NBuffers entries, but BufferAlloc() tries to insert |
485 | * a new entry before deleting the old. In principle this could be |
486 | * happening in each partition concurrently, so we could need as many as |
487 | * NBuffers + NUM_BUFFER_PARTITIONS entries. |
488 | */ |
489 | InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS); |
490 | |
491 | /* |
492 | * Get or create the shared strategy control block |
493 | */ |
494 | StrategyControl = (BufferStrategyControl *) |
495 | ShmemInitStruct("Buffer Strategy Status" , |
496 | sizeof(BufferStrategyControl), |
497 | &found); |
498 | |
499 | if (!found) |
500 | { |
501 | /* |
502 | * Only done once, usually in postmaster |
503 | */ |
504 | Assert(init); |
505 | |
506 | SpinLockInit(&StrategyControl->buffer_strategy_lock); |
507 | |
508 | /* |
509 | * Grab the whole linked list of free buffers for our strategy. We |
510 | * assume it was previously set up by InitBufferPool(). |
511 | */ |
512 | StrategyControl->firstFreeBuffer = 0; |
513 | StrategyControl->lastFreeBuffer = NBuffers - 1; |
514 | |
515 | /* Initialize the clock sweep pointer */ |
516 | pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0); |
517 | |
518 | /* Clear statistics */ |
519 | StrategyControl->completePasses = 0; |
520 | pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0); |
521 | |
522 | /* No pending notification */ |
523 | StrategyControl->bgwprocno = -1; |
524 | } |
525 | else |
526 | Assert(!init); |
527 | } |
528 | |
529 | |
530 | /* ---------------------------------------------------------------- |
531 | * Backend-private buffer ring management |
532 | * ---------------------------------------------------------------- |
533 | */ |
534 | |
535 | |
536 | /* |
537 | * GetAccessStrategy -- create a BufferAccessStrategy object |
538 | * |
539 | * The object is allocated in the current memory context. |
540 | */ |
541 | BufferAccessStrategy |
542 | GetAccessStrategy(BufferAccessStrategyType btype) |
543 | { |
544 | BufferAccessStrategy strategy; |
545 | int ring_size; |
546 | |
547 | /* |
548 | * Select ring size to use. See buffer/README for rationales. |
549 | * |
550 | * Note: if you change the ring size for BAS_BULKREAD, see also |
551 | * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c. |
552 | */ |
553 | switch (btype) |
554 | { |
555 | case BAS_NORMAL: |
556 | /* if someone asks for NORMAL, just give 'em a "default" object */ |
557 | return NULL; |
558 | |
559 | case BAS_BULKREAD: |
560 | ring_size = 256 * 1024 / BLCKSZ; |
561 | break; |
562 | case BAS_BULKWRITE: |
563 | ring_size = 16 * 1024 * 1024 / BLCKSZ; |
564 | break; |
565 | case BAS_VACUUM: |
566 | ring_size = 256 * 1024 / BLCKSZ; |
567 | break; |
568 | |
569 | default: |
570 | elog(ERROR, "unrecognized buffer access strategy: %d" , |
571 | (int) btype); |
572 | return NULL; /* keep compiler quiet */ |
573 | } |
574 | |
575 | /* Make sure ring isn't an undue fraction of shared buffers */ |
576 | ring_size = Min(NBuffers / 8, ring_size); |
577 | |
578 | /* Allocate the object and initialize all elements to zeroes */ |
579 | strategy = (BufferAccessStrategy) |
580 | palloc0(offsetof(BufferAccessStrategyData, buffers) + |
581 | ring_size * sizeof(Buffer)); |
582 | |
583 | /* Set fields that don't start out zero */ |
584 | strategy->btype = btype; |
585 | strategy->ring_size = ring_size; |
586 | |
587 | return strategy; |
588 | } |
589 | |
590 | /* |
591 | * FreeAccessStrategy -- release a BufferAccessStrategy object |
592 | * |
593 | * A simple pfree would do at the moment, but we would prefer that callers |
594 | * don't assume that much about the representation of BufferAccessStrategy. |
595 | */ |
596 | void |
597 | FreeAccessStrategy(BufferAccessStrategy strategy) |
598 | { |
599 | /* don't crash if called on a "default" strategy */ |
600 | if (strategy != NULL) |
601 | pfree(strategy); |
602 | } |
603 | |
604 | /* |
605 | * GetBufferFromRing -- returns a buffer from the ring, or NULL if the |
606 | * ring is empty. |
607 | * |
608 | * The bufhdr spin lock is held on the returned buffer. |
609 | */ |
610 | static BufferDesc * |
611 | GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state) |
612 | { |
613 | BufferDesc *buf; |
614 | Buffer bufnum; |
615 | uint32 local_buf_state; /* to avoid repeated (de-)referencing */ |
616 | |
617 | |
618 | /* Advance to next ring slot */ |
619 | if (++strategy->current >= strategy->ring_size) |
620 | strategy->current = 0; |
621 | |
622 | /* |
623 | * If the slot hasn't been filled yet, tell the caller to allocate a new |
624 | * buffer with the normal allocation strategy. He will then fill this |
625 | * slot by calling AddBufferToRing with the new buffer. |
626 | */ |
627 | bufnum = strategy->buffers[strategy->current]; |
628 | if (bufnum == InvalidBuffer) |
629 | { |
630 | strategy->current_was_in_ring = false; |
631 | return NULL; |
632 | } |
633 | |
634 | /* |
635 | * If the buffer is pinned we cannot use it under any circumstances. |
636 | * |
637 | * If usage_count is 0 or 1 then the buffer is fair game (we expect 1, |
638 | * since our own previous usage of the ring element would have left it |
639 | * there, but it might've been decremented by clock sweep since then). A |
640 | * higher usage_count indicates someone else has touched the buffer, so we |
641 | * shouldn't re-use it. |
642 | */ |
643 | buf = GetBufferDescriptor(bufnum - 1); |
644 | local_buf_state = LockBufHdr(buf); |
645 | if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0 |
646 | && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1) |
647 | { |
648 | strategy->current_was_in_ring = true; |
649 | *buf_state = local_buf_state; |
650 | return buf; |
651 | } |
652 | UnlockBufHdr(buf, local_buf_state); |
653 | |
654 | /* |
655 | * Tell caller to allocate a new buffer with the normal allocation |
656 | * strategy. He'll then replace this ring element via AddBufferToRing. |
657 | */ |
658 | strategy->current_was_in_ring = false; |
659 | return NULL; |
660 | } |
661 | |
662 | /* |
663 | * AddBufferToRing -- add a buffer to the buffer ring |
664 | * |
665 | * Caller must hold the buffer header spinlock on the buffer. Since this |
666 | * is called with the spinlock held, it had better be quite cheap. |
667 | */ |
668 | static void |
669 | AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) |
670 | { |
671 | strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf); |
672 | } |
673 | |
674 | /* |
675 | * StrategyRejectBuffer -- consider rejecting a dirty buffer |
676 | * |
677 | * When a nondefault strategy is used, the buffer manager calls this function |
678 | * when it turns out that the buffer selected by StrategyGetBuffer needs to |
679 | * be written out and doing so would require flushing WAL too. This gives us |
680 | * a chance to choose a different victim. |
681 | * |
682 | * Returns true if buffer manager should ask for a new victim, and false |
683 | * if this buffer should be written and re-used. |
684 | */ |
685 | bool |
686 | StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf) |
687 | { |
688 | /* We only do this in bulkread mode */ |
689 | if (strategy->btype != BAS_BULKREAD) |
690 | return false; |
691 | |
692 | /* Don't muck with behavior of normal buffer-replacement strategy */ |
693 | if (!strategy->current_was_in_ring || |
694 | strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf)) |
695 | return false; |
696 | |
697 | /* |
698 | * Remove the dirty buffer from the ring; necessary to prevent infinite |
699 | * loop if all ring members are dirty. |
700 | */ |
701 | strategy->buffers[strategy->current] = InvalidBuffer; |
702 | |
703 | return true; |
704 | } |
705 | |