1 | /* |
2 | * brin_revmap.c |
3 | * Range map for BRIN indexes |
4 | * |
5 | * The range map (revmap) is a translation structure for BRIN indexes: for each |
6 | * page range there is one summary tuple, and its location is tracked by the |
7 | * revmap. Whenever a new tuple is inserted into a table that violates the |
8 | * previously recorded summary values, a new tuple is inserted into the index |
9 | * and the revmap is updated to point to it. |
10 | * |
11 | * The revmap is stored in the first pages of the index, immediately following |
12 | * the metapage. When the revmap needs to be expanded, all tuples on the |
13 | * regular BRIN page at that block (if any) are moved out of the way. |
14 | * |
15 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
16 | * Portions Copyright (c) 1994, Regents of the University of California |
17 | * |
18 | * IDENTIFICATION |
19 | * src/backend/access/brin/brin_revmap.c |
20 | */ |
21 | #include "postgres.h" |
22 | |
23 | #include "access/brin_page.h" |
24 | #include "access/brin_pageops.h" |
25 | #include "access/brin_revmap.h" |
26 | #include "access/brin_tuple.h" |
27 | #include "access/brin_xlog.h" |
28 | #include "access/rmgr.h" |
29 | #include "access/xloginsert.h" |
30 | #include "miscadmin.h" |
31 | #include "storage/bufmgr.h" |
32 | #include "storage/lmgr.h" |
33 | #include "utils/rel.h" |
34 | |
35 | |
36 | /* |
37 | * In revmap pages, each item stores an ItemPointerData. These defines let one |
38 | * find the logical revmap page number and index number of the revmap item for |
39 | * the given heap block number. |
40 | */ |
41 | #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \ |
42 | ((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS) |
43 | #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \ |
44 | ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS) |
45 | |
46 | |
47 | struct BrinRevmap |
48 | { |
49 | Relation rm_irel; |
50 | BlockNumber rm_pagesPerRange; |
51 | BlockNumber rm_lastRevmapPage; /* cached from the metapage */ |
52 | Buffer rm_metaBuf; |
53 | Buffer rm_currBuf; |
54 | }; |
55 | |
56 | /* typedef appears in brin_revmap.h */ |
57 | |
58 | |
59 | static BlockNumber revmap_get_blkno(BrinRevmap *revmap, |
60 | BlockNumber heapBlk); |
61 | static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk); |
62 | static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, |
63 | BlockNumber heapBlk); |
64 | static void revmap_physical_extend(BrinRevmap *revmap); |
65 | |
66 | /* |
67 | * Initialize an access object for a range map. This must be freed by |
68 | * brinRevmapTerminate when caller is done with it. |
69 | */ |
70 | BrinRevmap * |
71 | brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, |
72 | Snapshot snapshot) |
73 | { |
74 | BrinRevmap *revmap; |
75 | Buffer meta; |
76 | BrinMetaPageData *metadata; |
77 | Page page; |
78 | |
79 | meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO); |
80 | LockBuffer(meta, BUFFER_LOCK_SHARE); |
81 | page = BufferGetPage(meta); |
82 | TestForOldSnapshot(snapshot, idxrel, page); |
83 | metadata = (BrinMetaPageData *) PageGetContents(page); |
84 | |
85 | revmap = palloc(sizeof(BrinRevmap)); |
86 | revmap->rm_irel = idxrel; |
87 | revmap->rm_pagesPerRange = metadata->pagesPerRange; |
88 | revmap->rm_lastRevmapPage = metadata->lastRevmapPage; |
89 | revmap->rm_metaBuf = meta; |
90 | revmap->rm_currBuf = InvalidBuffer; |
91 | |
92 | *pagesPerRange = metadata->pagesPerRange; |
93 | |
94 | LockBuffer(meta, BUFFER_LOCK_UNLOCK); |
95 | |
96 | return revmap; |
97 | } |
98 | |
99 | /* |
100 | * Release resources associated with a revmap access object. |
101 | */ |
102 | void |
103 | brinRevmapTerminate(BrinRevmap *revmap) |
104 | { |
105 | ReleaseBuffer(revmap->rm_metaBuf); |
106 | if (revmap->rm_currBuf != InvalidBuffer) |
107 | ReleaseBuffer(revmap->rm_currBuf); |
108 | pfree(revmap); |
109 | } |
110 | |
111 | /* |
112 | * Extend the revmap to cover the given heap block number. |
113 | */ |
114 | void |
115 | brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk) |
116 | { |
117 | BlockNumber mapBlk PG_USED_FOR_ASSERTS_ONLY; |
118 | |
119 | mapBlk = revmap_extend_and_get_blkno(revmap, heapBlk); |
120 | |
121 | /* Ensure the buffer we got is in the expected range */ |
122 | Assert(mapBlk != InvalidBlockNumber && |
123 | mapBlk != BRIN_METAPAGE_BLKNO && |
124 | mapBlk <= revmap->rm_lastRevmapPage); |
125 | } |
126 | |
127 | /* |
128 | * Prepare to insert an entry into the revmap; the revmap buffer in which the |
129 | * entry is to reside is locked and returned. Most callers should call |
130 | * brinRevmapExtend beforehand, as this routine does not extend the revmap if |
131 | * it's not long enough. |
132 | * |
133 | * The returned buffer is also recorded in the revmap struct; finishing that |
134 | * releases the buffer, therefore the caller needn't do it explicitly. |
135 | */ |
136 | Buffer |
137 | brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk) |
138 | { |
139 | Buffer rmBuf; |
140 | |
141 | rmBuf = revmap_get_buffer(revmap, heapBlk); |
142 | LockBuffer(rmBuf, BUFFER_LOCK_EXCLUSIVE); |
143 | |
144 | return rmBuf; |
145 | } |
146 | |
147 | /* |
148 | * In the given revmap buffer (locked appropriately by caller), which is used |
149 | * in a BRIN index of pagesPerRange pages per range, set the element |
150 | * corresponding to heap block number heapBlk to the given TID. |
151 | * |
152 | * Once the operation is complete, the caller must update the LSN on the |
153 | * returned buffer. |
154 | * |
155 | * This is used both in regular operation and during WAL replay. |
156 | */ |
157 | void |
158 | brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, |
159 | BlockNumber heapBlk, ItemPointerData tid) |
160 | { |
161 | RevmapContents *contents; |
162 | ItemPointerData *iptr; |
163 | Page page; |
164 | |
165 | /* The correct page should already be pinned and locked */ |
166 | page = BufferGetPage(buf); |
167 | contents = (RevmapContents *) PageGetContents(page); |
168 | iptr = (ItemPointerData *) contents->rm_tids; |
169 | iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk); |
170 | |
171 | if (ItemPointerIsValid(&tid)) |
172 | ItemPointerSet(iptr, |
173 | ItemPointerGetBlockNumber(&tid), |
174 | ItemPointerGetOffsetNumber(&tid)); |
175 | else |
176 | ItemPointerSetInvalid(iptr); |
177 | } |
178 | |
179 | /* |
180 | * Fetch the BrinTuple for a given heap block. |
181 | * |
182 | * The buffer containing the tuple is locked, and returned in *buf. The |
183 | * returned tuple points to the shared buffer and must not be freed; if caller |
184 | * wants to use it after releasing the buffer lock, it must create its own |
185 | * palloc'ed copy. As an optimization, the caller can pass a pinned buffer |
186 | * *buf on entry, which will avoid a pin-unpin cycle when the next tuple is on |
187 | * the same page as a previous one. |
188 | * |
189 | * If no tuple is found for the given heap range, returns NULL. In that case, |
190 | * *buf might still be updated (and pin must be released by caller), but it's |
191 | * not locked. |
192 | * |
193 | * The output tuple offset within the buffer is returned in *off, and its size |
194 | * is returned in *size. |
195 | */ |
196 | BrinTuple * |
197 | brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, |
198 | Buffer *buf, OffsetNumber *off, Size *size, int mode, |
199 | Snapshot snapshot) |
200 | { |
201 | Relation idxRel = revmap->rm_irel; |
202 | BlockNumber mapBlk; |
203 | RevmapContents *contents; |
204 | ItemPointerData *iptr; |
205 | BlockNumber blk; |
206 | Page page; |
207 | ItemId lp; |
208 | BrinTuple *tup; |
209 | ItemPointerData previptr; |
210 | |
211 | /* normalize the heap block number to be the first page in the range */ |
212 | heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange; |
213 | |
214 | /* |
215 | * Compute the revmap page number we need. If Invalid is returned (i.e., |
216 | * the revmap page hasn't been created yet), the requested page range is |
217 | * not summarized. |
218 | */ |
219 | mapBlk = revmap_get_blkno(revmap, heapBlk); |
220 | if (mapBlk == InvalidBlockNumber) |
221 | { |
222 | *off = InvalidOffsetNumber; |
223 | return NULL; |
224 | } |
225 | |
226 | ItemPointerSetInvalid(&previptr); |
227 | for (;;) |
228 | { |
229 | CHECK_FOR_INTERRUPTS(); |
230 | |
231 | if (revmap->rm_currBuf == InvalidBuffer || |
232 | BufferGetBlockNumber(revmap->rm_currBuf) != mapBlk) |
233 | { |
234 | if (revmap->rm_currBuf != InvalidBuffer) |
235 | ReleaseBuffer(revmap->rm_currBuf); |
236 | |
237 | Assert(mapBlk != InvalidBlockNumber); |
238 | revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); |
239 | } |
240 | |
241 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); |
242 | |
243 | contents = (RevmapContents *) |
244 | PageGetContents(BufferGetPage(revmap->rm_currBuf)); |
245 | iptr = contents->rm_tids; |
246 | iptr += HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk); |
247 | |
248 | if (!ItemPointerIsValid(iptr)) |
249 | { |
250 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK); |
251 | return NULL; |
252 | } |
253 | |
254 | /* |
255 | * Check the TID we got in a previous iteration, if any, and save the |
256 | * current TID we got from the revmap; if we loop, we can sanity-check |
257 | * that the next one we get is different. Otherwise we might be stuck |
258 | * looping forever if the revmap is somehow badly broken. |
259 | */ |
260 | if (ItemPointerIsValid(&previptr) && ItemPointerEquals(&previptr, iptr)) |
261 | ereport(ERROR, |
262 | (errcode(ERRCODE_INDEX_CORRUPTED), |
263 | errmsg_internal("corrupted BRIN index: inconsistent range map" ))); |
264 | previptr = *iptr; |
265 | |
266 | blk = ItemPointerGetBlockNumber(iptr); |
267 | *off = ItemPointerGetOffsetNumber(iptr); |
268 | |
269 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK); |
270 | |
271 | /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */ |
272 | if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk) |
273 | { |
274 | if (BufferIsValid(*buf)) |
275 | ReleaseBuffer(*buf); |
276 | *buf = ReadBuffer(idxRel, blk); |
277 | } |
278 | LockBuffer(*buf, mode); |
279 | page = BufferGetPage(*buf); |
280 | TestForOldSnapshot(snapshot, idxRel, page); |
281 | |
282 | /* If we land on a revmap page, start over */ |
283 | if (BRIN_IS_REGULAR_PAGE(page)) |
284 | { |
285 | if (*off > PageGetMaxOffsetNumber(page)) |
286 | ereport(ERROR, |
287 | (errcode(ERRCODE_INDEX_CORRUPTED), |
288 | errmsg_internal("corrupted BRIN index: inconsistent range map" ))); |
289 | lp = PageGetItemId(page, *off); |
290 | if (ItemIdIsUsed(lp)) |
291 | { |
292 | tup = (BrinTuple *) PageGetItem(page, lp); |
293 | |
294 | if (tup->bt_blkno == heapBlk) |
295 | { |
296 | if (size) |
297 | *size = ItemIdGetLength(lp); |
298 | /* found it! */ |
299 | return tup; |
300 | } |
301 | } |
302 | } |
303 | |
304 | /* |
305 | * No luck. Assume that the revmap was updated concurrently. |
306 | */ |
307 | LockBuffer(*buf, BUFFER_LOCK_UNLOCK); |
308 | } |
309 | /* not reached, but keep compiler quiet */ |
310 | return NULL; |
311 | } |
312 | |
313 | /* |
314 | * Delete an index tuple, marking a page range as unsummarized. |
315 | * |
316 | * Index must be locked in ShareUpdateExclusiveLock mode. |
317 | * |
318 | * Return false if caller should retry. |
319 | */ |
320 | bool |
321 | brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk) |
322 | { |
323 | BrinRevmap *revmap; |
324 | BlockNumber pagesPerRange; |
325 | RevmapContents *contents; |
326 | ItemPointerData *iptr; |
327 | ItemPointerData invalidIptr; |
328 | BlockNumber revmapBlk; |
329 | Buffer revmapBuf; |
330 | Buffer regBuf; |
331 | Page revmapPg; |
332 | Page regPg; |
333 | OffsetNumber revmapOffset; |
334 | OffsetNumber regOffset; |
335 | ItemId lp; |
336 | BrinTuple *tup; |
337 | |
338 | revmap = brinRevmapInitialize(idxrel, &pagesPerRange, NULL); |
339 | |
340 | revmapBlk = revmap_get_blkno(revmap, heapBlk); |
341 | if (!BlockNumberIsValid(revmapBlk)) |
342 | { |
343 | /* revmap page doesn't exist: range not summarized, we're done */ |
344 | brinRevmapTerminate(revmap); |
345 | return true; |
346 | } |
347 | |
348 | /* Lock the revmap page, obtain the index tuple pointer from it */ |
349 | revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk); |
350 | revmapPg = BufferGetPage(revmapBuf); |
351 | revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk); |
352 | |
353 | contents = (RevmapContents *) PageGetContents(revmapPg); |
354 | iptr = contents->rm_tids; |
355 | iptr += revmapOffset; |
356 | |
357 | if (!ItemPointerIsValid(iptr)) |
358 | { |
359 | /* no index tuple: range not summarized, we're done */ |
360 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
361 | brinRevmapTerminate(revmap); |
362 | return true; |
363 | } |
364 | |
365 | regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr)); |
366 | LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE); |
367 | regPg = BufferGetPage(regBuf); |
368 | |
369 | /* if this is no longer a regular page, tell caller to start over */ |
370 | if (!BRIN_IS_REGULAR_PAGE(regPg)) |
371 | { |
372 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
373 | LockBuffer(regBuf, BUFFER_LOCK_UNLOCK); |
374 | brinRevmapTerminate(revmap); |
375 | return false; |
376 | } |
377 | |
378 | regOffset = ItemPointerGetOffsetNumber(iptr); |
379 | if (regOffset > PageGetMaxOffsetNumber(regPg)) |
380 | ereport(ERROR, |
381 | (errcode(ERRCODE_INDEX_CORRUPTED), |
382 | errmsg("corrupted BRIN index: inconsistent range map" ))); |
383 | |
384 | lp = PageGetItemId(regPg, regOffset); |
385 | if (!ItemIdIsUsed(lp)) |
386 | ereport(ERROR, |
387 | (errcode(ERRCODE_INDEX_CORRUPTED), |
388 | errmsg("corrupted BRIN index: inconsistent range map" ))); |
389 | tup = (BrinTuple *) PageGetItem(regPg, lp); |
390 | /* XXX apply sanity checks? Might as well delete a bogus tuple ... */ |
391 | |
392 | /* |
393 | * We're only removing data, not reading it, so there's no need to |
394 | * TestForOldSnapshot here. |
395 | */ |
396 | |
397 | /* |
398 | * Because of SUE lock, this function shouldn't run concurrently with |
399 | * summarization. Placeholder tuples can only exist as leftovers from |
400 | * crashed summarization, so if we detect any, we complain but proceed. |
401 | */ |
402 | if (BrinTupleIsPlaceholder(tup)) |
403 | ereport(WARNING, |
404 | (errmsg("leftover placeholder tuple detected in BRIN index \"%s\", deleting" , |
405 | RelationGetRelationName(idxrel)))); |
406 | |
407 | START_CRIT_SECTION(); |
408 | |
409 | ItemPointerSetInvalid(&invalidIptr); |
410 | brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk, |
411 | invalidIptr); |
412 | PageIndexTupleDeleteNoCompact(regPg, regOffset); |
413 | /* XXX record free space in FSM? */ |
414 | |
415 | MarkBufferDirty(regBuf); |
416 | MarkBufferDirty(revmapBuf); |
417 | |
418 | if (RelationNeedsWAL(idxrel)) |
419 | { |
420 | xl_brin_desummarize xlrec; |
421 | XLogRecPtr recptr; |
422 | |
423 | xlrec.pagesPerRange = revmap->rm_pagesPerRange; |
424 | xlrec.heapBlk = heapBlk; |
425 | xlrec.regOffset = regOffset; |
426 | |
427 | XLogBeginInsert(); |
428 | XLogRegisterData((char *) &xlrec, SizeOfBrinDesummarize); |
429 | XLogRegisterBuffer(0, revmapBuf, 0); |
430 | XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD); |
431 | recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE); |
432 | PageSetLSN(revmapPg, recptr); |
433 | PageSetLSN(regPg, recptr); |
434 | } |
435 | |
436 | END_CRIT_SECTION(); |
437 | |
438 | UnlockReleaseBuffer(regBuf); |
439 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
440 | brinRevmapTerminate(revmap); |
441 | |
442 | return true; |
443 | } |
444 | |
445 | /* |
446 | * Given a heap block number, find the corresponding physical revmap block |
447 | * number and return it. If the revmap page hasn't been allocated yet, return |
448 | * InvalidBlockNumber. |
449 | */ |
450 | static BlockNumber |
451 | revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) |
452 | { |
453 | BlockNumber targetblk; |
454 | |
455 | /* obtain revmap block number, skip 1 for metapage block */ |
456 | targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1; |
457 | |
458 | /* Normal case: the revmap page is already allocated */ |
459 | if (targetblk <= revmap->rm_lastRevmapPage) |
460 | return targetblk; |
461 | |
462 | return InvalidBlockNumber; |
463 | } |
464 | |
465 | /* |
466 | * Obtain and return a buffer containing the revmap page for the given heap |
467 | * page. The revmap must have been previously extended to cover that page. |
468 | * The returned buffer is also recorded in the revmap struct; finishing that |
469 | * releases the buffer, therefore the caller needn't do it explicitly. |
470 | */ |
471 | static Buffer |
472 | revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) |
473 | { |
474 | BlockNumber mapBlk; |
475 | |
476 | /* Translate the heap block number to physical index location. */ |
477 | mapBlk = revmap_get_blkno(revmap, heapBlk); |
478 | |
479 | if (mapBlk == InvalidBlockNumber) |
480 | elog(ERROR, "revmap does not cover heap block %u" , heapBlk); |
481 | |
482 | /* Ensure the buffer we got is in the expected range */ |
483 | Assert(mapBlk != BRIN_METAPAGE_BLKNO && |
484 | mapBlk <= revmap->rm_lastRevmapPage); |
485 | |
486 | /* |
487 | * Obtain the buffer from which we need to read. If we already have the |
488 | * correct buffer in our access struct, use that; otherwise, release that, |
489 | * (if valid) and read the one we need. |
490 | */ |
491 | if (revmap->rm_currBuf == InvalidBuffer || |
492 | mapBlk != BufferGetBlockNumber(revmap->rm_currBuf)) |
493 | { |
494 | if (revmap->rm_currBuf != InvalidBuffer) |
495 | ReleaseBuffer(revmap->rm_currBuf); |
496 | |
497 | revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); |
498 | } |
499 | |
500 | return revmap->rm_currBuf; |
501 | } |
502 | |
503 | /* |
504 | * Given a heap block number, find the corresponding physical revmap block |
505 | * number and return it. If the revmap page hasn't been allocated yet, extend |
506 | * the revmap until it is. |
507 | */ |
508 | static BlockNumber |
509 | revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) |
510 | { |
511 | BlockNumber targetblk; |
512 | |
513 | /* obtain revmap block number, skip 1 for metapage block */ |
514 | targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1; |
515 | |
516 | /* Extend the revmap, if necessary */ |
517 | while (targetblk > revmap->rm_lastRevmapPage) |
518 | { |
519 | CHECK_FOR_INTERRUPTS(); |
520 | revmap_physical_extend(revmap); |
521 | } |
522 | |
523 | return targetblk; |
524 | } |
525 | |
526 | /* |
527 | * Try to extend the revmap by one page. This might not happen for a number of |
528 | * reasons; caller is expected to retry until the expected outcome is obtained. |
529 | */ |
530 | static void |
531 | revmap_physical_extend(BrinRevmap *revmap) |
532 | { |
533 | Buffer buf; |
534 | Page page; |
535 | Page metapage; |
536 | BrinMetaPageData *metadata; |
537 | BlockNumber mapBlk; |
538 | BlockNumber nblocks; |
539 | Relation irel = revmap->rm_irel; |
540 | bool needLock = !RELATION_IS_LOCAL(irel); |
541 | |
542 | /* |
543 | * Lock the metapage. This locks out concurrent extensions of the revmap, |
544 | * but note that we still need to grab the relation extension lock because |
545 | * another backend can extend the index with regular BRIN pages. |
546 | */ |
547 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_EXCLUSIVE); |
548 | metapage = BufferGetPage(revmap->rm_metaBuf); |
549 | metadata = (BrinMetaPageData *) PageGetContents(metapage); |
550 | |
551 | /* |
552 | * Check that our cached lastRevmapPage value was up-to-date; if it |
553 | * wasn't, update the cached copy and have caller start over. |
554 | */ |
555 | if (metadata->lastRevmapPage != revmap->rm_lastRevmapPage) |
556 | { |
557 | revmap->rm_lastRevmapPage = metadata->lastRevmapPage; |
558 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
559 | return; |
560 | } |
561 | mapBlk = metadata->lastRevmapPage + 1; |
562 | |
563 | nblocks = RelationGetNumberOfBlocks(irel); |
564 | if (mapBlk < nblocks) |
565 | { |
566 | buf = ReadBuffer(irel, mapBlk); |
567 | LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); |
568 | page = BufferGetPage(buf); |
569 | } |
570 | else |
571 | { |
572 | if (needLock) |
573 | LockRelationForExtension(irel, ExclusiveLock); |
574 | |
575 | buf = ReadBuffer(irel, P_NEW); |
576 | if (BufferGetBlockNumber(buf) != mapBlk) |
577 | { |
578 | /* |
579 | * Very rare corner case: somebody extended the relation |
580 | * concurrently after we read its length. If this happens, give |
581 | * up and have caller start over. We will have to evacuate that |
582 | * page from under whoever is using it. |
583 | */ |
584 | if (needLock) |
585 | UnlockRelationForExtension(irel, ExclusiveLock); |
586 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
587 | ReleaseBuffer(buf); |
588 | return; |
589 | } |
590 | LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); |
591 | page = BufferGetPage(buf); |
592 | |
593 | if (needLock) |
594 | UnlockRelationForExtension(irel, ExclusiveLock); |
595 | } |
596 | |
597 | /* Check that it's a regular block (or an empty page) */ |
598 | if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page)) |
599 | ereport(ERROR, |
600 | (errcode(ERRCODE_INDEX_CORRUPTED), |
601 | errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u" , |
602 | BrinPageType(page), |
603 | RelationGetRelationName(irel), |
604 | BufferGetBlockNumber(buf)))); |
605 | |
606 | /* If the page is in use, evacuate it and restart */ |
607 | if (brin_start_evacuating_page(irel, buf)) |
608 | { |
609 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
610 | brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf); |
611 | |
612 | /* have caller start over */ |
613 | return; |
614 | } |
615 | |
616 | /* |
617 | * Ok, we have now locked the metapage and the target block. Re-initialize |
618 | * the target block as a revmap page, and update the metapage. |
619 | */ |
620 | START_CRIT_SECTION(); |
621 | |
622 | /* the rm_tids array is initialized to all invalid by PageInit */ |
623 | brin_page_init(page, BRIN_PAGETYPE_REVMAP); |
624 | MarkBufferDirty(buf); |
625 | |
626 | metadata->lastRevmapPage = mapBlk; |
627 | |
628 | /* |
629 | * Set pd_lower just past the end of the metadata. This is essential, |
630 | * because without doing so, metadata will be lost if xlog.c compresses |
631 | * the page. (We must do this here because pre-v11 versions of PG did not |
632 | * set the metapage's pd_lower correctly, so a pg_upgraded index might |
633 | * contain the wrong value.) |
634 | */ |
635 | ((PageHeader) metapage)->pd_lower = |
636 | ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapage; |
637 | |
638 | MarkBufferDirty(revmap->rm_metaBuf); |
639 | |
640 | if (RelationNeedsWAL(revmap->rm_irel)) |
641 | { |
642 | xl_brin_revmap_extend xlrec; |
643 | XLogRecPtr recptr; |
644 | |
645 | xlrec.targetBlk = mapBlk; |
646 | |
647 | XLogBeginInsert(); |
648 | XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend); |
649 | XLogRegisterBuffer(0, revmap->rm_metaBuf, REGBUF_STANDARD); |
650 | |
651 | XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT); |
652 | |
653 | recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND); |
654 | PageSetLSN(metapage, recptr); |
655 | PageSetLSN(page, recptr); |
656 | } |
657 | |
658 | END_CRIT_SECTION(); |
659 | |
660 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
661 | |
662 | UnlockReleaseBuffer(buf); |
663 | } |
664 | |