| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * bufpage.h |
| 4 | * Standard POSTGRES buffer page definitions. |
| 5 | * |
| 6 | * |
| 7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 8 | * Portions Copyright (c) 1994, Regents of the University of California |
| 9 | * |
| 10 | * src/include/storage/bufpage.h |
| 11 | * |
| 12 | *------------------------------------------------------------------------- |
| 13 | */ |
| 14 | #ifndef BUFPAGE_H |
| 15 | #define BUFPAGE_H |
| 16 | |
| 17 | #include "access/xlogdefs.h" |
| 18 | #include "storage/block.h" |
| 19 | #include "storage/item.h" |
| 20 | #include "storage/off.h" |
| 21 | |
| 22 | /* |
| 23 | * A postgres disk page is an abstraction layered on top of a postgres |
| 24 | * disk block (which is simply a unit of i/o, see block.h). |
| 25 | * |
| 26 | * specifically, while a disk block can be unformatted, a postgres |
| 27 | * disk page is always a slotted page of the form: |
| 28 | * |
| 29 | * +----------------+---------------------------------+ |
| 30 | * | PageHeaderData | linp1 linp2 linp3 ... | |
| 31 | * +-----------+----+---------------------------------+ |
| 32 | * | ... linpN | | |
| 33 | * +-----------+--------------------------------------+ |
| 34 | * | ^ pd_lower | |
| 35 | * | | |
| 36 | * | v pd_upper | |
| 37 | * +-------------+------------------------------------+ |
| 38 | * | | tupleN ... | |
| 39 | * +-------------+------------------+-----------------+ |
| 40 | * | ... tuple3 tuple2 tuple1 | "special space" | |
| 41 | * +--------------------------------+-----------------+ |
| 42 | * ^ pd_special |
| 43 | * |
| 44 | * a page is full when nothing can be added between pd_lower and |
| 45 | * pd_upper. |
| 46 | * |
| 47 | * all blocks written out by an access method must be disk pages. |
| 48 | * |
| 49 | * EXCEPTIONS: |
| 50 | * |
| 51 | * obviously, a page is not formatted before it is initialized by |
| 52 | * a call to PageInit. |
| 53 | * |
| 54 | * NOTES: |
| 55 | * |
| 56 | * linp1..N form an ItemId (line pointer) array. ItemPointers point |
| 57 | * to a physical block number and a logical offset (line pointer |
| 58 | * number) within that block/page. Note that OffsetNumbers |
| 59 | * conventionally start at 1, not 0. |
| 60 | * |
| 61 | * tuple1..N are added "backwards" on the page. Since an ItemPointer |
| 62 | * offset is used to access an ItemId entry rather than an actual |
| 63 | * byte-offset position, tuples can be physically shuffled on a page |
| 64 | * whenever the need arises. This indirection also keeps crash recovery |
| 65 | * relatively simple, because the low-level details of page space |
| 66 | * management can be controlled by standard buffer page code during |
| 67 | * logging, and during recovery. |
| 68 | * |
| 69 | * AM-generic per-page information is kept in PageHeaderData. |
| 70 | * |
| 71 | * AM-specific per-page data (if any) is kept in the area marked "special |
| 72 | * space"; each AM has an "opaque" structure defined somewhere that is |
| 73 | * stored as the page trailer. an access method should always |
| 74 | * initialize its pages with PageInit and then set its own opaque |
| 75 | * fields. |
| 76 | */ |
| 77 | |
| 78 | typedef Pointer Page; |
| 79 | |
| 80 | |
| 81 | /* |
| 82 | * location (byte offset) within a page. |
| 83 | * |
| 84 | * note that this is actually limited to 2^15 because we have limited |
| 85 | * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h). |
| 86 | */ |
| 87 | typedef uint16 LocationIndex; |
| 88 | |
| 89 | |
| 90 | /* |
| 91 | * For historical reasons, the 64-bit LSN value is stored as two 32-bit |
| 92 | * values. |
| 93 | */ |
| 94 | typedef struct |
| 95 | { |
| 96 | uint32 xlogid; /* high bits */ |
| 97 | uint32 xrecoff; /* low bits */ |
| 98 | } PageXLogRecPtr; |
| 99 | |
| 100 | #define PageXLogRecPtrGet(val) \ |
| 101 | ((uint64) (val).xlogid << 32 | (val).xrecoff) |
| 102 | #define PageXLogRecPtrSet(ptr, lsn) \ |
| 103 | ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn)) |
| 104 | |
| 105 | /* |
| 106 | * disk page organization |
| 107 | * |
| 108 | * space management information generic to any page |
| 109 | * |
| 110 | * pd_lsn - identifies xlog record for last change to this page. |
| 111 | * pd_checksum - page checksum, if set. |
| 112 | * pd_flags - flag bits. |
| 113 | * pd_lower - offset to start of free space. |
| 114 | * pd_upper - offset to end of free space. |
| 115 | * pd_special - offset to start of special space. |
| 116 | * pd_pagesize_version - size in bytes and page layout version number. |
| 117 | * pd_prune_xid - oldest XID among potentially prunable tuples on page. |
| 118 | * |
| 119 | * The LSN is used by the buffer manager to enforce the basic rule of WAL: |
| 120 | * "thou shalt write xlog before data". A dirty buffer cannot be dumped |
| 121 | * to disk until xlog has been flushed at least as far as the page's LSN. |
| 122 | * |
| 123 | * pd_checksum stores the page checksum, if it has been set for this page; |
| 124 | * zero is a valid value for a checksum. If a checksum is not in use then |
| 125 | * we leave the field unset. This will typically mean the field is zero |
| 126 | * though non-zero values may also be present if databases have been |
| 127 | * pg_upgraded from releases prior to 9.3, when the same byte offset was |
| 128 | * used to store the current timelineid when the page was last updated. |
| 129 | * Note that there is no indication on a page as to whether the checksum |
| 130 | * is valid or not, a deliberate design choice which avoids the problem |
| 131 | * of relying on the page contents to decide whether to verify it. Hence |
| 132 | * there are no flag bits relating to checksums. |
| 133 | * |
| 134 | * pd_prune_xid is a hint field that helps determine whether pruning will be |
| 135 | * useful. It is currently unused in index pages. |
| 136 | * |
| 137 | * The page version number and page size are packed together into a single |
| 138 | * uint16 field. This is for historical reasons: before PostgreSQL 7.3, |
| 139 | * there was no concept of a page version number, and doing it this way |
| 140 | * lets us pretend that pre-7.3 databases have page version number zero. |
| 141 | * We constrain page sizes to be multiples of 256, leaving the low eight |
| 142 | * bits available for a version number. |
| 143 | * |
| 144 | * Minimum possible page size is perhaps 64B to fit page header, opaque space |
| 145 | * and a minimal tuple; of course, in reality you want it much bigger, so |
| 146 | * the constraint on pagesize mod 256 is not an important restriction. |
| 147 | * On the high end, we can only support pages up to 32KB because lp_off/lp_len |
| 148 | * are 15 bits. |
| 149 | */ |
| 150 | |
| 151 | typedef struct |
| 152 | { |
| 153 | /* XXX LSN is member of *any* block, not only page-organized ones */ |
| 154 | PageXLogRecPtr ; /* LSN: next byte after last byte of xlog |
| 155 | * record for last change to this page */ |
| 156 | uint16 ; /* checksum */ |
| 157 | uint16 ; /* flag bits, see below */ |
| 158 | LocationIndex ; /* offset to start of free space */ |
| 159 | LocationIndex ; /* offset to end of free space */ |
| 160 | LocationIndex ; /* offset to start of special space */ |
| 161 | uint16 ; |
| 162 | TransactionId ; /* oldest prunable XID, or zero if none */ |
| 163 | ItemIdData [FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ |
| 164 | } ; |
| 165 | |
| 166 | typedef PageHeaderData *; |
| 167 | |
| 168 | /* |
| 169 | * pd_flags contains the following flag bits. Undefined bits are initialized |
| 170 | * to zero and may be used in the future. |
| 171 | * |
| 172 | * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before |
| 173 | * pd_lower. This should be considered a hint rather than the truth, since |
| 174 | * changes to it are not WAL-logged. |
| 175 | * |
| 176 | * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the |
| 177 | * page for its new tuple version; this suggests that a prune is needed. |
| 178 | * Again, this is just a hint. |
| 179 | */ |
| 180 | #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */ |
| 181 | #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */ |
| 182 | #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to |
| 183 | * everyone */ |
| 184 | |
| 185 | #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */ |
| 186 | |
| 187 | /* |
| 188 | * Page layout version number 0 is for pre-7.3 Postgres releases. |
| 189 | * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout. |
| 190 | * Release 8.0 uses 2; it changed the HeapTupleHeader layout again. |
| 191 | * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits. |
| 192 | * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and |
| 193 | * added the pd_flags field (by stealing some bits from pd_tli), |
| 194 | * as well as adding the pd_prune_xid field (which enlarges the header). |
| 195 | * |
| 196 | * As of Release 9.3, the checksum version must also be considered when |
| 197 | * handling pages. |
| 198 | */ |
| 199 | #define PG_PAGE_LAYOUT_VERSION 4 |
| 200 | #define PG_DATA_CHECKSUM_VERSION 1 |
| 201 | |
| 202 | /* ---------------------------------------------------------------- |
| 203 | * page support macros |
| 204 | * ---------------------------------------------------------------- |
| 205 | */ |
| 206 | |
| 207 | /* |
| 208 | * PageIsValid |
| 209 | * True iff page is valid. |
| 210 | */ |
| 211 | #define PageIsValid(page) PointerIsValid(page) |
| 212 | |
| 213 | /* |
| 214 | * line pointer(s) do not count as part of header |
| 215 | */ |
| 216 | #define (offsetof(PageHeaderData, pd_linp)) |
| 217 | |
| 218 | /* |
| 219 | * PageIsEmpty |
| 220 | * returns true iff no itemid has been allocated on the page |
| 221 | */ |
| 222 | #define PageIsEmpty(page) \ |
| 223 | (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData) |
| 224 | |
| 225 | /* |
| 226 | * PageIsNew |
| 227 | * returns true iff page has not been initialized (by PageInit) |
| 228 | */ |
| 229 | #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0) |
| 230 | |
| 231 | /* |
| 232 | * PageGetItemId |
| 233 | * Returns an item identifier of a page. |
| 234 | */ |
| 235 | #define PageGetItemId(page, offsetNumber) \ |
| 236 | ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1])) |
| 237 | |
| 238 | /* |
| 239 | * PageGetContents |
| 240 | * To be used in cases where the page does not contain line pointers. |
| 241 | * |
| 242 | * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result. |
| 243 | * Now it is. Beware of old code that might think the offset to the contents |
| 244 | * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData). |
| 245 | */ |
| 246 | #define PageGetContents(page) \ |
| 247 | ((char *) (page) + MAXALIGN(SizeOfPageHeaderData)) |
| 248 | |
| 249 | /* ---------------- |
| 250 | * macros to access page size info |
| 251 | * ---------------- |
| 252 | */ |
| 253 | |
| 254 | /* |
| 255 | * PageSizeIsValid |
| 256 | * True iff the page size is valid. |
| 257 | */ |
| 258 | #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ) |
| 259 | |
| 260 | /* |
| 261 | * PageGetPageSize |
| 262 | * Returns the page size of a page. |
| 263 | * |
| 264 | * this can only be called on a formatted page (unlike |
| 265 | * BufferGetPageSize, which can be called on an unformatted page). |
| 266 | * however, it can be called on a page that is not stored in a buffer. |
| 267 | */ |
| 268 | #define PageGetPageSize(page) \ |
| 269 | ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00)) |
| 270 | |
| 271 | /* |
| 272 | * PageGetPageLayoutVersion |
| 273 | * Returns the page layout version of a page. |
| 274 | */ |
| 275 | #define PageGetPageLayoutVersion(page) \ |
| 276 | (((PageHeader) (page))->pd_pagesize_version & 0x00FF) |
| 277 | |
| 278 | /* |
| 279 | * PageSetPageSizeAndVersion |
| 280 | * Sets the page size and page layout version number of a page. |
| 281 | * |
| 282 | * We could support setting these two values separately, but there's |
| 283 | * no real need for it at the moment. |
| 284 | */ |
| 285 | #define PageSetPageSizeAndVersion(page, size, version) \ |
| 286 | ( \ |
| 287 | AssertMacro(((size) & 0xFF00) == (size)), \ |
| 288 | AssertMacro(((version) & 0x00FF) == (version)), \ |
| 289 | ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \ |
| 290 | ) |
| 291 | |
| 292 | /* ---------------- |
| 293 | * page special data macros |
| 294 | * ---------------- |
| 295 | */ |
| 296 | /* |
| 297 | * PageGetSpecialSize |
| 298 | * Returns size of special space on a page. |
| 299 | */ |
| 300 | #define PageGetSpecialSize(page) \ |
| 301 | ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special)) |
| 302 | |
| 303 | /* |
| 304 | * Using assertions, validate that the page special pointer is OK. |
| 305 | * |
| 306 | * This is intended to catch use of the pointer before page initialization. |
| 307 | * It is implemented as a function due to the limitations of the MSVC |
| 308 | * compiler, which choked on doing all these tests within another macro. We |
| 309 | * return true so that MacroAssert() can be used while still getting the |
| 310 | * specifics from the macro failure within this function. |
| 311 | */ |
| 312 | static inline bool |
| 313 | PageValidateSpecialPointer(Page page) |
| 314 | { |
| 315 | Assert(PageIsValid(page)); |
| 316 | Assert(((PageHeader) (page))->pd_special <= BLCKSZ); |
| 317 | Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData); |
| 318 | |
| 319 | return true; |
| 320 | } |
| 321 | |
| 322 | /* |
| 323 | * PageGetSpecialPointer |
| 324 | * Returns pointer to special space on a page. |
| 325 | */ |
| 326 | #define PageGetSpecialPointer(page) \ |
| 327 | ( \ |
| 328 | AssertMacro(PageValidateSpecialPointer(page)), \ |
| 329 | (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \ |
| 330 | ) |
| 331 | |
| 332 | /* |
| 333 | * PageGetItem |
| 334 | * Retrieves an item on the given page. |
| 335 | * |
| 336 | * Note: |
| 337 | * This does not change the status of any of the resources passed. |
| 338 | * The semantics may change in the future. |
| 339 | */ |
| 340 | #define PageGetItem(page, itemId) \ |
| 341 | ( \ |
| 342 | AssertMacro(PageIsValid(page)), \ |
| 343 | AssertMacro(ItemIdHasStorage(itemId)), \ |
| 344 | (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \ |
| 345 | ) |
| 346 | |
| 347 | /* |
| 348 | * PageGetMaxOffsetNumber |
| 349 | * Returns the maximum offset number used by the given page. |
| 350 | * Since offset numbers are 1-based, this is also the number |
| 351 | * of items on the page. |
| 352 | * |
| 353 | * NOTE: if the page is not initialized (pd_lower == 0), we must |
| 354 | * return zero to ensure sane behavior. Accept double evaluation |
| 355 | * of the argument so that we can ensure this. |
| 356 | */ |
| 357 | #define PageGetMaxOffsetNumber(page) \ |
| 358 | (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \ |
| 359 | ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \ |
| 360 | / sizeof(ItemIdData))) |
| 361 | |
| 362 | /* |
| 363 | * Additional macros for access to page headers. (Beware multiple evaluation |
| 364 | * of the arguments!) |
| 365 | */ |
| 366 | #define PageGetLSN(page) \ |
| 367 | PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn) |
| 368 | #define PageSetLSN(page, lsn) \ |
| 369 | PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn) |
| 370 | |
| 371 | #define PageHasFreeLinePointers(page) \ |
| 372 | (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES) |
| 373 | #define PageSetHasFreeLinePointers(page) \ |
| 374 | (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES) |
| 375 | #define PageClearHasFreeLinePointers(page) \ |
| 376 | (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES) |
| 377 | |
| 378 | #define PageIsFull(page) \ |
| 379 | (((PageHeader) (page))->pd_flags & PD_PAGE_FULL) |
| 380 | #define PageSetFull(page) \ |
| 381 | (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL) |
| 382 | #define PageClearFull(page) \ |
| 383 | (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL) |
| 384 | |
| 385 | #define PageIsAllVisible(page) \ |
| 386 | (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE) |
| 387 | #define PageSetAllVisible(page) \ |
| 388 | (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE) |
| 389 | #define PageClearAllVisible(page) \ |
| 390 | (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE) |
| 391 | |
| 392 | #define PageIsPrunable(page, oldestxmin) \ |
| 393 | ( \ |
| 394 | AssertMacro(TransactionIdIsNormal(oldestxmin)), \ |
| 395 | TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \ |
| 396 | TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \ |
| 397 | ) |
| 398 | #define PageSetPrunable(page, xid) \ |
| 399 | do { \ |
| 400 | Assert(TransactionIdIsNormal(xid)); \ |
| 401 | if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ |
| 402 | TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ |
| 403 | ((PageHeader) (page))->pd_prune_xid = (xid); \ |
| 404 | } while (0) |
| 405 | #define PageClearPrunable(page) \ |
| 406 | (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) |
| 407 | |
| 408 | |
| 409 | /* ---------------------------------------------------------------- |
| 410 | * extern declarations |
| 411 | * ---------------------------------------------------------------- |
| 412 | */ |
| 413 | #define PAI_OVERWRITE (1 << 0) |
| 414 | #define PAI_IS_HEAP (1 << 1) |
| 415 | |
| 416 | #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \ |
| 417 | PageAddItemExtended(page, item, size, offsetNumber, \ |
| 418 | ((overwrite) ? PAI_OVERWRITE : 0) | \ |
| 419 | ((is_heap) ? PAI_IS_HEAP : 0)) |
| 420 | |
| 421 | extern void PageInit(Page page, Size pageSize, Size specialSize); |
| 422 | extern bool PageIsVerified(Page page, BlockNumber blkno); |
| 423 | extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, |
| 424 | OffsetNumber offsetNumber, int flags); |
| 425 | extern Page PageGetTempPage(Page page); |
| 426 | extern Page PageGetTempPageCopy(Page page); |
| 427 | extern Page PageGetTempPageCopySpecial(Page page); |
| 428 | extern void (Page tempPage, Page oldPage); |
| 429 | extern void (Page page); |
| 430 | extern Size PageGetFreeSpace(Page page); |
| 431 | extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups); |
| 432 | extern Size PageGetExactFreeSpace(Page page); |
| 433 | extern Size PageGetHeapFreeSpace(Page page); |
| 434 | extern void PageIndexTupleDelete(Page page, OffsetNumber offset); |
| 435 | extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems); |
| 436 | extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset); |
| 437 | extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, |
| 438 | Item newtup, Size newsize); |
| 439 | extern char *PageSetChecksumCopy(Page page, BlockNumber blkno); |
| 440 | extern void PageSetChecksumInplace(Page page, BlockNumber blkno); |
| 441 | |
| 442 | #endif /* BUFPAGE_H */ |
| 443 | |