| 1 | /* |
| 2 | * brin_revmap.c |
| 3 | * Range map for BRIN indexes |
| 4 | * |
| 5 | * The range map (revmap) is a translation structure for BRIN indexes: for each |
| 6 | * page range there is one summary tuple, and its location is tracked by the |
| 7 | * revmap. Whenever a new tuple is inserted into a table that violates the |
| 8 | * previously recorded summary values, a new tuple is inserted into the index |
| 9 | * and the revmap is updated to point to it. |
| 10 | * |
| 11 | * The revmap is stored in the first pages of the index, immediately following |
| 12 | * the metapage. When the revmap needs to be expanded, all tuples on the |
| 13 | * regular BRIN page at that block (if any) are moved out of the way. |
| 14 | * |
| 15 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 16 | * Portions Copyright (c) 1994, Regents of the University of California |
| 17 | * |
| 18 | * IDENTIFICATION |
| 19 | * src/backend/access/brin/brin_revmap.c |
| 20 | */ |
| 21 | #include "postgres.h" |
| 22 | |
| 23 | #include "access/brin_page.h" |
| 24 | #include "access/brin_pageops.h" |
| 25 | #include "access/brin_revmap.h" |
| 26 | #include "access/brin_tuple.h" |
| 27 | #include "access/brin_xlog.h" |
| 28 | #include "access/rmgr.h" |
| 29 | #include "access/xloginsert.h" |
| 30 | #include "miscadmin.h" |
| 31 | #include "storage/bufmgr.h" |
| 32 | #include "storage/lmgr.h" |
| 33 | #include "utils/rel.h" |
| 34 | |
| 35 | |
| 36 | /* |
| 37 | * In revmap pages, each item stores an ItemPointerData. These defines let one |
| 38 | * find the logical revmap page number and index number of the revmap item for |
| 39 | * the given heap block number. |
| 40 | */ |
| 41 | #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \ |
| 42 | ((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS) |
| 43 | #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \ |
| 44 | ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS) |
| 45 | |
| 46 | |
| 47 | struct BrinRevmap |
| 48 | { |
| 49 | Relation rm_irel; |
| 50 | BlockNumber rm_pagesPerRange; |
| 51 | BlockNumber rm_lastRevmapPage; /* cached from the metapage */ |
| 52 | Buffer rm_metaBuf; |
| 53 | Buffer rm_currBuf; |
| 54 | }; |
| 55 | |
| 56 | /* typedef appears in brin_revmap.h */ |
| 57 | |
| 58 | |
| 59 | static BlockNumber revmap_get_blkno(BrinRevmap *revmap, |
| 60 | BlockNumber heapBlk); |
| 61 | static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk); |
| 62 | static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, |
| 63 | BlockNumber heapBlk); |
| 64 | static void revmap_physical_extend(BrinRevmap *revmap); |
| 65 | |
| 66 | /* |
| 67 | * Initialize an access object for a range map. This must be freed by |
| 68 | * brinRevmapTerminate when caller is done with it. |
| 69 | */ |
| 70 | BrinRevmap * |
| 71 | brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, |
| 72 | Snapshot snapshot) |
| 73 | { |
| 74 | BrinRevmap *revmap; |
| 75 | Buffer meta; |
| 76 | BrinMetaPageData *metadata; |
| 77 | Page page; |
| 78 | |
| 79 | meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO); |
| 80 | LockBuffer(meta, BUFFER_LOCK_SHARE); |
| 81 | page = BufferGetPage(meta); |
| 82 | TestForOldSnapshot(snapshot, idxrel, page); |
| 83 | metadata = (BrinMetaPageData *) PageGetContents(page); |
| 84 | |
| 85 | revmap = palloc(sizeof(BrinRevmap)); |
| 86 | revmap->rm_irel = idxrel; |
| 87 | revmap->rm_pagesPerRange = metadata->pagesPerRange; |
| 88 | revmap->rm_lastRevmapPage = metadata->lastRevmapPage; |
| 89 | revmap->rm_metaBuf = meta; |
| 90 | revmap->rm_currBuf = InvalidBuffer; |
| 91 | |
| 92 | *pagesPerRange = metadata->pagesPerRange; |
| 93 | |
| 94 | LockBuffer(meta, BUFFER_LOCK_UNLOCK); |
| 95 | |
| 96 | return revmap; |
| 97 | } |
| 98 | |
| 99 | /* |
| 100 | * Release resources associated with a revmap access object. |
| 101 | */ |
| 102 | void |
| 103 | brinRevmapTerminate(BrinRevmap *revmap) |
| 104 | { |
| 105 | ReleaseBuffer(revmap->rm_metaBuf); |
| 106 | if (revmap->rm_currBuf != InvalidBuffer) |
| 107 | ReleaseBuffer(revmap->rm_currBuf); |
| 108 | pfree(revmap); |
| 109 | } |
| 110 | |
| 111 | /* |
| 112 | * Extend the revmap to cover the given heap block number. |
| 113 | */ |
| 114 | void |
| 115 | brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk) |
| 116 | { |
| 117 | BlockNumber mapBlk PG_USED_FOR_ASSERTS_ONLY; |
| 118 | |
| 119 | mapBlk = revmap_extend_and_get_blkno(revmap, heapBlk); |
| 120 | |
| 121 | /* Ensure the buffer we got is in the expected range */ |
| 122 | Assert(mapBlk != InvalidBlockNumber && |
| 123 | mapBlk != BRIN_METAPAGE_BLKNO && |
| 124 | mapBlk <= revmap->rm_lastRevmapPage); |
| 125 | } |
| 126 | |
| 127 | /* |
| 128 | * Prepare to insert an entry into the revmap; the revmap buffer in which the |
| 129 | * entry is to reside is locked and returned. Most callers should call |
| 130 | * brinRevmapExtend beforehand, as this routine does not extend the revmap if |
| 131 | * it's not long enough. |
| 132 | * |
| 133 | * The returned buffer is also recorded in the revmap struct; finishing that |
| 134 | * releases the buffer, therefore the caller needn't do it explicitly. |
| 135 | */ |
| 136 | Buffer |
| 137 | brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk) |
| 138 | { |
| 139 | Buffer rmBuf; |
| 140 | |
| 141 | rmBuf = revmap_get_buffer(revmap, heapBlk); |
| 142 | LockBuffer(rmBuf, BUFFER_LOCK_EXCLUSIVE); |
| 143 | |
| 144 | return rmBuf; |
| 145 | } |
| 146 | |
| 147 | /* |
| 148 | * In the given revmap buffer (locked appropriately by caller), which is used |
| 149 | * in a BRIN index of pagesPerRange pages per range, set the element |
| 150 | * corresponding to heap block number heapBlk to the given TID. |
| 151 | * |
| 152 | * Once the operation is complete, the caller must update the LSN on the |
| 153 | * returned buffer. |
| 154 | * |
| 155 | * This is used both in regular operation and during WAL replay. |
| 156 | */ |
| 157 | void |
| 158 | brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, |
| 159 | BlockNumber heapBlk, ItemPointerData tid) |
| 160 | { |
| 161 | RevmapContents *contents; |
| 162 | ItemPointerData *iptr; |
| 163 | Page page; |
| 164 | |
| 165 | /* The correct page should already be pinned and locked */ |
| 166 | page = BufferGetPage(buf); |
| 167 | contents = (RevmapContents *) PageGetContents(page); |
| 168 | iptr = (ItemPointerData *) contents->rm_tids; |
| 169 | iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk); |
| 170 | |
| 171 | if (ItemPointerIsValid(&tid)) |
| 172 | ItemPointerSet(iptr, |
| 173 | ItemPointerGetBlockNumber(&tid), |
| 174 | ItemPointerGetOffsetNumber(&tid)); |
| 175 | else |
| 176 | ItemPointerSetInvalid(iptr); |
| 177 | } |
| 178 | |
| 179 | /* |
| 180 | * Fetch the BrinTuple for a given heap block. |
| 181 | * |
| 182 | * The buffer containing the tuple is locked, and returned in *buf. The |
| 183 | * returned tuple points to the shared buffer and must not be freed; if caller |
| 184 | * wants to use it after releasing the buffer lock, it must create its own |
| 185 | * palloc'ed copy. As an optimization, the caller can pass a pinned buffer |
| 186 | * *buf on entry, which will avoid a pin-unpin cycle when the next tuple is on |
| 187 | * the same page as a previous one. |
| 188 | * |
| 189 | * If no tuple is found for the given heap range, returns NULL. In that case, |
| 190 | * *buf might still be updated (and pin must be released by caller), but it's |
| 191 | * not locked. |
| 192 | * |
| 193 | * The output tuple offset within the buffer is returned in *off, and its size |
| 194 | * is returned in *size. |
| 195 | */ |
| 196 | BrinTuple * |
| 197 | brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, |
| 198 | Buffer *buf, OffsetNumber *off, Size *size, int mode, |
| 199 | Snapshot snapshot) |
| 200 | { |
| 201 | Relation idxRel = revmap->rm_irel; |
| 202 | BlockNumber mapBlk; |
| 203 | RevmapContents *contents; |
| 204 | ItemPointerData *iptr; |
| 205 | BlockNumber blk; |
| 206 | Page page; |
| 207 | ItemId lp; |
| 208 | BrinTuple *tup; |
| 209 | ItemPointerData previptr; |
| 210 | |
| 211 | /* normalize the heap block number to be the first page in the range */ |
| 212 | heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange; |
| 213 | |
| 214 | /* |
| 215 | * Compute the revmap page number we need. If Invalid is returned (i.e., |
| 216 | * the revmap page hasn't been created yet), the requested page range is |
| 217 | * not summarized. |
| 218 | */ |
| 219 | mapBlk = revmap_get_blkno(revmap, heapBlk); |
| 220 | if (mapBlk == InvalidBlockNumber) |
| 221 | { |
| 222 | *off = InvalidOffsetNumber; |
| 223 | return NULL; |
| 224 | } |
| 225 | |
| 226 | ItemPointerSetInvalid(&previptr); |
| 227 | for (;;) |
| 228 | { |
| 229 | CHECK_FOR_INTERRUPTS(); |
| 230 | |
| 231 | if (revmap->rm_currBuf == InvalidBuffer || |
| 232 | BufferGetBlockNumber(revmap->rm_currBuf) != mapBlk) |
| 233 | { |
| 234 | if (revmap->rm_currBuf != InvalidBuffer) |
| 235 | ReleaseBuffer(revmap->rm_currBuf); |
| 236 | |
| 237 | Assert(mapBlk != InvalidBlockNumber); |
| 238 | revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); |
| 239 | } |
| 240 | |
| 241 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); |
| 242 | |
| 243 | contents = (RevmapContents *) |
| 244 | PageGetContents(BufferGetPage(revmap->rm_currBuf)); |
| 245 | iptr = contents->rm_tids; |
| 246 | iptr += HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk); |
| 247 | |
| 248 | if (!ItemPointerIsValid(iptr)) |
| 249 | { |
| 250 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK); |
| 251 | return NULL; |
| 252 | } |
| 253 | |
| 254 | /* |
| 255 | * Check the TID we got in a previous iteration, if any, and save the |
| 256 | * current TID we got from the revmap; if we loop, we can sanity-check |
| 257 | * that the next one we get is different. Otherwise we might be stuck |
| 258 | * looping forever if the revmap is somehow badly broken. |
| 259 | */ |
| 260 | if (ItemPointerIsValid(&previptr) && ItemPointerEquals(&previptr, iptr)) |
| 261 | ereport(ERROR, |
| 262 | (errcode(ERRCODE_INDEX_CORRUPTED), |
| 263 | errmsg_internal("corrupted BRIN index: inconsistent range map" ))); |
| 264 | previptr = *iptr; |
| 265 | |
| 266 | blk = ItemPointerGetBlockNumber(iptr); |
| 267 | *off = ItemPointerGetOffsetNumber(iptr); |
| 268 | |
| 269 | LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK); |
| 270 | |
| 271 | /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */ |
| 272 | if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk) |
| 273 | { |
| 274 | if (BufferIsValid(*buf)) |
| 275 | ReleaseBuffer(*buf); |
| 276 | *buf = ReadBuffer(idxRel, blk); |
| 277 | } |
| 278 | LockBuffer(*buf, mode); |
| 279 | page = BufferGetPage(*buf); |
| 280 | TestForOldSnapshot(snapshot, idxRel, page); |
| 281 | |
| 282 | /* If we land on a revmap page, start over */ |
| 283 | if (BRIN_IS_REGULAR_PAGE(page)) |
| 284 | { |
| 285 | if (*off > PageGetMaxOffsetNumber(page)) |
| 286 | ereport(ERROR, |
| 287 | (errcode(ERRCODE_INDEX_CORRUPTED), |
| 288 | errmsg_internal("corrupted BRIN index: inconsistent range map" ))); |
| 289 | lp = PageGetItemId(page, *off); |
| 290 | if (ItemIdIsUsed(lp)) |
| 291 | { |
| 292 | tup = (BrinTuple *) PageGetItem(page, lp); |
| 293 | |
| 294 | if (tup->bt_blkno == heapBlk) |
| 295 | { |
| 296 | if (size) |
| 297 | *size = ItemIdGetLength(lp); |
| 298 | /* found it! */ |
| 299 | return tup; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | /* |
| 305 | * No luck. Assume that the revmap was updated concurrently. |
| 306 | */ |
| 307 | LockBuffer(*buf, BUFFER_LOCK_UNLOCK); |
| 308 | } |
| 309 | /* not reached, but keep compiler quiet */ |
| 310 | return NULL; |
| 311 | } |
| 312 | |
| 313 | /* |
| 314 | * Delete an index tuple, marking a page range as unsummarized. |
| 315 | * |
| 316 | * Index must be locked in ShareUpdateExclusiveLock mode. |
| 317 | * |
| 318 | * Return false if caller should retry. |
| 319 | */ |
| 320 | bool |
| 321 | brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk) |
| 322 | { |
| 323 | BrinRevmap *revmap; |
| 324 | BlockNumber pagesPerRange; |
| 325 | RevmapContents *contents; |
| 326 | ItemPointerData *iptr; |
| 327 | ItemPointerData invalidIptr; |
| 328 | BlockNumber revmapBlk; |
| 329 | Buffer revmapBuf; |
| 330 | Buffer regBuf; |
| 331 | Page revmapPg; |
| 332 | Page regPg; |
| 333 | OffsetNumber revmapOffset; |
| 334 | OffsetNumber regOffset; |
| 335 | ItemId lp; |
| 336 | BrinTuple *tup; |
| 337 | |
| 338 | revmap = brinRevmapInitialize(idxrel, &pagesPerRange, NULL); |
| 339 | |
| 340 | revmapBlk = revmap_get_blkno(revmap, heapBlk); |
| 341 | if (!BlockNumberIsValid(revmapBlk)) |
| 342 | { |
| 343 | /* revmap page doesn't exist: range not summarized, we're done */ |
| 344 | brinRevmapTerminate(revmap); |
| 345 | return true; |
| 346 | } |
| 347 | |
| 348 | /* Lock the revmap page, obtain the index tuple pointer from it */ |
| 349 | revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk); |
| 350 | revmapPg = BufferGetPage(revmapBuf); |
| 351 | revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk); |
| 352 | |
| 353 | contents = (RevmapContents *) PageGetContents(revmapPg); |
| 354 | iptr = contents->rm_tids; |
| 355 | iptr += revmapOffset; |
| 356 | |
| 357 | if (!ItemPointerIsValid(iptr)) |
| 358 | { |
| 359 | /* no index tuple: range not summarized, we're done */ |
| 360 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
| 361 | brinRevmapTerminate(revmap); |
| 362 | return true; |
| 363 | } |
| 364 | |
| 365 | regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr)); |
| 366 | LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE); |
| 367 | regPg = BufferGetPage(regBuf); |
| 368 | |
| 369 | /* if this is no longer a regular page, tell caller to start over */ |
| 370 | if (!BRIN_IS_REGULAR_PAGE(regPg)) |
| 371 | { |
| 372 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
| 373 | LockBuffer(regBuf, BUFFER_LOCK_UNLOCK); |
| 374 | brinRevmapTerminate(revmap); |
| 375 | return false; |
| 376 | } |
| 377 | |
| 378 | regOffset = ItemPointerGetOffsetNumber(iptr); |
| 379 | if (regOffset > PageGetMaxOffsetNumber(regPg)) |
| 380 | ereport(ERROR, |
| 381 | (errcode(ERRCODE_INDEX_CORRUPTED), |
| 382 | errmsg("corrupted BRIN index: inconsistent range map" ))); |
| 383 | |
| 384 | lp = PageGetItemId(regPg, regOffset); |
| 385 | if (!ItemIdIsUsed(lp)) |
| 386 | ereport(ERROR, |
| 387 | (errcode(ERRCODE_INDEX_CORRUPTED), |
| 388 | errmsg("corrupted BRIN index: inconsistent range map" ))); |
| 389 | tup = (BrinTuple *) PageGetItem(regPg, lp); |
| 390 | /* XXX apply sanity checks? Might as well delete a bogus tuple ... */ |
| 391 | |
| 392 | /* |
| 393 | * We're only removing data, not reading it, so there's no need to |
| 394 | * TestForOldSnapshot here. |
| 395 | */ |
| 396 | |
| 397 | /* |
| 398 | * Because of SUE lock, this function shouldn't run concurrently with |
| 399 | * summarization. Placeholder tuples can only exist as leftovers from |
| 400 | * crashed summarization, so if we detect any, we complain but proceed. |
| 401 | */ |
| 402 | if (BrinTupleIsPlaceholder(tup)) |
| 403 | ereport(WARNING, |
| 404 | (errmsg("leftover placeholder tuple detected in BRIN index \"%s\", deleting" , |
| 405 | RelationGetRelationName(idxrel)))); |
| 406 | |
| 407 | START_CRIT_SECTION(); |
| 408 | |
| 409 | ItemPointerSetInvalid(&invalidIptr); |
| 410 | brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk, |
| 411 | invalidIptr); |
| 412 | PageIndexTupleDeleteNoCompact(regPg, regOffset); |
| 413 | /* XXX record free space in FSM? */ |
| 414 | |
| 415 | MarkBufferDirty(regBuf); |
| 416 | MarkBufferDirty(revmapBuf); |
| 417 | |
| 418 | if (RelationNeedsWAL(idxrel)) |
| 419 | { |
| 420 | xl_brin_desummarize xlrec; |
| 421 | XLogRecPtr recptr; |
| 422 | |
| 423 | xlrec.pagesPerRange = revmap->rm_pagesPerRange; |
| 424 | xlrec.heapBlk = heapBlk; |
| 425 | xlrec.regOffset = regOffset; |
| 426 | |
| 427 | XLogBeginInsert(); |
| 428 | XLogRegisterData((char *) &xlrec, SizeOfBrinDesummarize); |
| 429 | XLogRegisterBuffer(0, revmapBuf, 0); |
| 430 | XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD); |
| 431 | recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE); |
| 432 | PageSetLSN(revmapPg, recptr); |
| 433 | PageSetLSN(regPg, recptr); |
| 434 | } |
| 435 | |
| 436 | END_CRIT_SECTION(); |
| 437 | |
| 438 | UnlockReleaseBuffer(regBuf); |
| 439 | LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); |
| 440 | brinRevmapTerminate(revmap); |
| 441 | |
| 442 | return true; |
| 443 | } |
| 444 | |
| 445 | /* |
| 446 | * Given a heap block number, find the corresponding physical revmap block |
| 447 | * number and return it. If the revmap page hasn't been allocated yet, return |
| 448 | * InvalidBlockNumber. |
| 449 | */ |
| 450 | static BlockNumber |
| 451 | revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) |
| 452 | { |
| 453 | BlockNumber targetblk; |
| 454 | |
| 455 | /* obtain revmap block number, skip 1 for metapage block */ |
| 456 | targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1; |
| 457 | |
| 458 | /* Normal case: the revmap page is already allocated */ |
| 459 | if (targetblk <= revmap->rm_lastRevmapPage) |
| 460 | return targetblk; |
| 461 | |
| 462 | return InvalidBlockNumber; |
| 463 | } |
| 464 | |
| 465 | /* |
| 466 | * Obtain and return a buffer containing the revmap page for the given heap |
| 467 | * page. The revmap must have been previously extended to cover that page. |
| 468 | * The returned buffer is also recorded in the revmap struct; finishing that |
| 469 | * releases the buffer, therefore the caller needn't do it explicitly. |
| 470 | */ |
| 471 | static Buffer |
| 472 | revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) |
| 473 | { |
| 474 | BlockNumber mapBlk; |
| 475 | |
| 476 | /* Translate the heap block number to physical index location. */ |
| 477 | mapBlk = revmap_get_blkno(revmap, heapBlk); |
| 478 | |
| 479 | if (mapBlk == InvalidBlockNumber) |
| 480 | elog(ERROR, "revmap does not cover heap block %u" , heapBlk); |
| 481 | |
| 482 | /* Ensure the buffer we got is in the expected range */ |
| 483 | Assert(mapBlk != BRIN_METAPAGE_BLKNO && |
| 484 | mapBlk <= revmap->rm_lastRevmapPage); |
| 485 | |
| 486 | /* |
| 487 | * Obtain the buffer from which we need to read. If we already have the |
| 488 | * correct buffer in our access struct, use that; otherwise, release that, |
| 489 | * (if valid) and read the one we need. |
| 490 | */ |
| 491 | if (revmap->rm_currBuf == InvalidBuffer || |
| 492 | mapBlk != BufferGetBlockNumber(revmap->rm_currBuf)) |
| 493 | { |
| 494 | if (revmap->rm_currBuf != InvalidBuffer) |
| 495 | ReleaseBuffer(revmap->rm_currBuf); |
| 496 | |
| 497 | revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); |
| 498 | } |
| 499 | |
| 500 | return revmap->rm_currBuf; |
| 501 | } |
| 502 | |
| 503 | /* |
| 504 | * Given a heap block number, find the corresponding physical revmap block |
| 505 | * number and return it. If the revmap page hasn't been allocated yet, extend |
| 506 | * the revmap until it is. |
| 507 | */ |
| 508 | static BlockNumber |
| 509 | revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) |
| 510 | { |
| 511 | BlockNumber targetblk; |
| 512 | |
| 513 | /* obtain revmap block number, skip 1 for metapage block */ |
| 514 | targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1; |
| 515 | |
| 516 | /* Extend the revmap, if necessary */ |
| 517 | while (targetblk > revmap->rm_lastRevmapPage) |
| 518 | { |
| 519 | CHECK_FOR_INTERRUPTS(); |
| 520 | revmap_physical_extend(revmap); |
| 521 | } |
| 522 | |
| 523 | return targetblk; |
| 524 | } |
| 525 | |
| 526 | /* |
| 527 | * Try to extend the revmap by one page. This might not happen for a number of |
| 528 | * reasons; caller is expected to retry until the expected outcome is obtained. |
| 529 | */ |
| 530 | static void |
| 531 | revmap_physical_extend(BrinRevmap *revmap) |
| 532 | { |
| 533 | Buffer buf; |
| 534 | Page page; |
| 535 | Page metapage; |
| 536 | BrinMetaPageData *metadata; |
| 537 | BlockNumber mapBlk; |
| 538 | BlockNumber nblocks; |
| 539 | Relation irel = revmap->rm_irel; |
| 540 | bool needLock = !RELATION_IS_LOCAL(irel); |
| 541 | |
| 542 | /* |
| 543 | * Lock the metapage. This locks out concurrent extensions of the revmap, |
| 544 | * but note that we still need to grab the relation extension lock because |
| 545 | * another backend can extend the index with regular BRIN pages. |
| 546 | */ |
| 547 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_EXCLUSIVE); |
| 548 | metapage = BufferGetPage(revmap->rm_metaBuf); |
| 549 | metadata = (BrinMetaPageData *) PageGetContents(metapage); |
| 550 | |
| 551 | /* |
| 552 | * Check that our cached lastRevmapPage value was up-to-date; if it |
| 553 | * wasn't, update the cached copy and have caller start over. |
| 554 | */ |
| 555 | if (metadata->lastRevmapPage != revmap->rm_lastRevmapPage) |
| 556 | { |
| 557 | revmap->rm_lastRevmapPage = metadata->lastRevmapPage; |
| 558 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
| 559 | return; |
| 560 | } |
| 561 | mapBlk = metadata->lastRevmapPage + 1; |
| 562 | |
| 563 | nblocks = RelationGetNumberOfBlocks(irel); |
| 564 | if (mapBlk < nblocks) |
| 565 | { |
| 566 | buf = ReadBuffer(irel, mapBlk); |
| 567 | LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); |
| 568 | page = BufferGetPage(buf); |
| 569 | } |
| 570 | else |
| 571 | { |
| 572 | if (needLock) |
| 573 | LockRelationForExtension(irel, ExclusiveLock); |
| 574 | |
| 575 | buf = ReadBuffer(irel, P_NEW); |
| 576 | if (BufferGetBlockNumber(buf) != mapBlk) |
| 577 | { |
| 578 | /* |
| 579 | * Very rare corner case: somebody extended the relation |
| 580 | * concurrently after we read its length. If this happens, give |
| 581 | * up and have caller start over. We will have to evacuate that |
| 582 | * page from under whoever is using it. |
| 583 | */ |
| 584 | if (needLock) |
| 585 | UnlockRelationForExtension(irel, ExclusiveLock); |
| 586 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
| 587 | ReleaseBuffer(buf); |
| 588 | return; |
| 589 | } |
| 590 | LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); |
| 591 | page = BufferGetPage(buf); |
| 592 | |
| 593 | if (needLock) |
| 594 | UnlockRelationForExtension(irel, ExclusiveLock); |
| 595 | } |
| 596 | |
| 597 | /* Check that it's a regular block (or an empty page) */ |
| 598 | if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page)) |
| 599 | ereport(ERROR, |
| 600 | (errcode(ERRCODE_INDEX_CORRUPTED), |
| 601 | errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u" , |
| 602 | BrinPageType(page), |
| 603 | RelationGetRelationName(irel), |
| 604 | BufferGetBlockNumber(buf)))); |
| 605 | |
| 606 | /* If the page is in use, evacuate it and restart */ |
| 607 | if (brin_start_evacuating_page(irel, buf)) |
| 608 | { |
| 609 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
| 610 | brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf); |
| 611 | |
| 612 | /* have caller start over */ |
| 613 | return; |
| 614 | } |
| 615 | |
| 616 | /* |
| 617 | * Ok, we have now locked the metapage and the target block. Re-initialize |
| 618 | * the target block as a revmap page, and update the metapage. |
| 619 | */ |
| 620 | START_CRIT_SECTION(); |
| 621 | |
| 622 | /* the rm_tids array is initialized to all invalid by PageInit */ |
| 623 | brin_page_init(page, BRIN_PAGETYPE_REVMAP); |
| 624 | MarkBufferDirty(buf); |
| 625 | |
| 626 | metadata->lastRevmapPage = mapBlk; |
| 627 | |
| 628 | /* |
| 629 | * Set pd_lower just past the end of the metadata. This is essential, |
| 630 | * because without doing so, metadata will be lost if xlog.c compresses |
| 631 | * the page. (We must do this here because pre-v11 versions of PG did not |
| 632 | * set the metapage's pd_lower correctly, so a pg_upgraded index might |
| 633 | * contain the wrong value.) |
| 634 | */ |
| 635 | ((PageHeader) metapage)->pd_lower = |
| 636 | ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapage; |
| 637 | |
| 638 | MarkBufferDirty(revmap->rm_metaBuf); |
| 639 | |
| 640 | if (RelationNeedsWAL(revmap->rm_irel)) |
| 641 | { |
| 642 | xl_brin_revmap_extend xlrec; |
| 643 | XLogRecPtr recptr; |
| 644 | |
| 645 | xlrec.targetBlk = mapBlk; |
| 646 | |
| 647 | XLogBeginInsert(); |
| 648 | XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend); |
| 649 | XLogRegisterBuffer(0, revmap->rm_metaBuf, REGBUF_STANDARD); |
| 650 | |
| 651 | XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT); |
| 652 | |
| 653 | recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND); |
| 654 | PageSetLSN(metapage, recptr); |
| 655 | PageSetLSN(page, recptr); |
| 656 | } |
| 657 | |
| 658 | END_CRIT_SECTION(); |
| 659 | |
| 660 | LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); |
| 661 | |
| 662 | UnlockReleaseBuffer(buf); |
| 663 | } |
| 664 | |