1/*-------------------------------------------------------------------------
2 *
3 * bufpage.h
4 * Standard POSTGRES buffer page definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/storage/bufpage.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef BUFPAGE_H
15#define BUFPAGE_H
16
17#include "access/xlogdefs.h"
18#include "storage/block.h"
19#include "storage/item.h"
20#include "storage/off.h"
21
22/*
23 * A postgres disk page is an abstraction layered on top of a postgres
24 * disk block (which is simply a unit of i/o, see block.h).
25 *
26 * specifically, while a disk block can be unformatted, a postgres
27 * disk page is always a slotted page of the form:
28 *
29 * +----------------+---------------------------------+
30 * | PageHeaderData | linp1 linp2 linp3 ... |
31 * +-----------+----+---------------------------------+
32 * | ... linpN | |
33 * +-----------+--------------------------------------+
34 * | ^ pd_lower |
35 * | |
36 * | v pd_upper |
37 * +-------------+------------------------------------+
38 * | | tupleN ... |
39 * +-------------+------------------+-----------------+
40 * | ... tuple3 tuple2 tuple1 | "special space" |
41 * +--------------------------------+-----------------+
42 * ^ pd_special
43 *
44 * a page is full when nothing can be added between pd_lower and
45 * pd_upper.
46 *
47 * all blocks written out by an access method must be disk pages.
48 *
49 * EXCEPTIONS:
50 *
51 * obviously, a page is not formatted before it is initialized by
52 * a call to PageInit.
53 *
54 * NOTES:
55 *
56 * linp1..N form an ItemId (line pointer) array. ItemPointers point
57 * to a physical block number and a logical offset (line pointer
58 * number) within that block/page. Note that OffsetNumbers
59 * conventionally start at 1, not 0.
60 *
61 * tuple1..N are added "backwards" on the page. Since an ItemPointer
62 * offset is used to access an ItemId entry rather than an actual
63 * byte-offset position, tuples can be physically shuffled on a page
64 * whenever the need arises. This indirection also keeps crash recovery
65 * relatively simple, because the low-level details of page space
66 * management can be controlled by standard buffer page code during
67 * logging, and during recovery.
68 *
69 * AM-generic per-page information is kept in PageHeaderData.
70 *
71 * AM-specific per-page data (if any) is kept in the area marked "special
72 * space"; each AM has an "opaque" structure defined somewhere that is
73 * stored as the page trailer. an access method should always
74 * initialize its pages with PageInit and then set its own opaque
75 * fields.
76 */
77
78typedef Pointer Page;
79
80
81/*
82 * location (byte offset) within a page.
83 *
84 * note that this is actually limited to 2^15 because we have limited
85 * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
86 */
87typedef uint16 LocationIndex;
88
89
90/*
91 * For historical reasons, the 64-bit LSN value is stored as two 32-bit
92 * values.
93 */
94typedef struct
95{
96 uint32 xlogid; /* high bits */
97 uint32 xrecoff; /* low bits */
98} PageXLogRecPtr;
99
100#define PageXLogRecPtrGet(val) \
101 ((uint64) (val).xlogid << 32 | (val).xrecoff)
102#define PageXLogRecPtrSet(ptr, lsn) \
103 ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
104
105/*
106 * disk page organization
107 *
108 * space management information generic to any page
109 *
110 * pd_lsn - identifies xlog record for last change to this page.
111 * pd_checksum - page checksum, if set.
112 * pd_flags - flag bits.
113 * pd_lower - offset to start of free space.
114 * pd_upper - offset to end of free space.
115 * pd_special - offset to start of special space.
116 * pd_pagesize_version - size in bytes and page layout version number.
117 * pd_prune_xid - oldest XID among potentially prunable tuples on page.
118 *
119 * The LSN is used by the buffer manager to enforce the basic rule of WAL:
120 * "thou shalt write xlog before data". A dirty buffer cannot be dumped
121 * to disk until xlog has been flushed at least as far as the page's LSN.
122 *
123 * pd_checksum stores the page checksum, if it has been set for this page;
124 * zero is a valid value for a checksum. If a checksum is not in use then
125 * we leave the field unset. This will typically mean the field is zero
126 * though non-zero values may also be present if databases have been
127 * pg_upgraded from releases prior to 9.3, when the same byte offset was
128 * used to store the current timelineid when the page was last updated.
129 * Note that there is no indication on a page as to whether the checksum
130 * is valid or not, a deliberate design choice which avoids the problem
131 * of relying on the page contents to decide whether to verify it. Hence
132 * there are no flag bits relating to checksums.
133 *
134 * pd_prune_xid is a hint field that helps determine whether pruning will be
135 * useful. It is currently unused in index pages.
136 *
137 * The page version number and page size are packed together into a single
138 * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
139 * there was no concept of a page version number, and doing it this way
140 * lets us pretend that pre-7.3 databases have page version number zero.
141 * We constrain page sizes to be multiples of 256, leaving the low eight
142 * bits available for a version number.
143 *
144 * Minimum possible page size is perhaps 64B to fit page header, opaque space
145 * and a minimal tuple; of course, in reality you want it much bigger, so
146 * the constraint on pagesize mod 256 is not an important restriction.
147 * On the high end, we can only support pages up to 32KB because lp_off/lp_len
148 * are 15 bits.
149 */
150
151typedef struct PageHeaderData
152{
153 /* XXX LSN is member of *any* block, not only page-organized ones */
154 PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
155 * record for last change to this page */
156 uint16 pd_checksum; /* checksum */
157 uint16 pd_flags; /* flag bits, see below */
158 LocationIndex pd_lower; /* offset to start of free space */
159 LocationIndex pd_upper; /* offset to end of free space */
160 LocationIndex pd_special; /* offset to start of special space */
161 uint16 pd_pagesize_version;
162 TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
163 ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
164} PageHeaderData;
165
166typedef PageHeaderData *PageHeader;
167
168/*
169 * pd_flags contains the following flag bits. Undefined bits are initialized
170 * to zero and may be used in the future.
171 *
172 * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
173 * pd_lower. This should be considered a hint rather than the truth, since
174 * changes to it are not WAL-logged.
175 *
176 * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
177 * page for its new tuple version; this suggests that a prune is needed.
178 * Again, this is just a hint.
179 */
180#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
181#define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
182#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
183 * everyone */
184
185#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
186
187/*
188 * Page layout version number 0 is for pre-7.3 Postgres releases.
189 * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
190 * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
191 * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
192 * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
193 * added the pd_flags field (by stealing some bits from pd_tli),
194 * as well as adding the pd_prune_xid field (which enlarges the header).
195 *
196 * As of Release 9.3, the checksum version must also be considered when
197 * handling pages.
198 */
199#define PG_PAGE_LAYOUT_VERSION 4
200#define PG_DATA_CHECKSUM_VERSION 1
201
202/* ----------------------------------------------------------------
203 * page support macros
204 * ----------------------------------------------------------------
205 */
206
207/*
208 * PageIsValid
209 * True iff page is valid.
210 */
211#define PageIsValid(page) PointerIsValid(page)
212
213/*
214 * line pointer(s) do not count as part of header
215 */
216#define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
217
218/*
219 * PageIsEmpty
220 * returns true iff no itemid has been allocated on the page
221 */
222#define PageIsEmpty(page) \
223 (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
224
225/*
226 * PageIsNew
227 * returns true iff page has not been initialized (by PageInit)
228 */
229#define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
230
231/*
232 * PageGetItemId
233 * Returns an item identifier of a page.
234 */
235#define PageGetItemId(page, offsetNumber) \
236 ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
237
238/*
239 * PageGetContents
240 * To be used in cases where the page does not contain line pointers.
241 *
242 * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
243 * Now it is. Beware of old code that might think the offset to the contents
244 * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
245 */
246#define PageGetContents(page) \
247 ((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
248
249/* ----------------
250 * macros to access page size info
251 * ----------------
252 */
253
254/*
255 * PageSizeIsValid
256 * True iff the page size is valid.
257 */
258#define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
259
260/*
261 * PageGetPageSize
262 * Returns the page size of a page.
263 *
264 * this can only be called on a formatted page (unlike
265 * BufferGetPageSize, which can be called on an unformatted page).
266 * however, it can be called on a page that is not stored in a buffer.
267 */
268#define PageGetPageSize(page) \
269 ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
270
271/*
272 * PageGetPageLayoutVersion
273 * Returns the page layout version of a page.
274 */
275#define PageGetPageLayoutVersion(page) \
276 (((PageHeader) (page))->pd_pagesize_version & 0x00FF)
277
278/*
279 * PageSetPageSizeAndVersion
280 * Sets the page size and page layout version number of a page.
281 *
282 * We could support setting these two values separately, but there's
283 * no real need for it at the moment.
284 */
285#define PageSetPageSizeAndVersion(page, size, version) \
286( \
287 AssertMacro(((size) & 0xFF00) == (size)), \
288 AssertMacro(((version) & 0x00FF) == (version)), \
289 ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
290)
291
292/* ----------------
293 * page special data macros
294 * ----------------
295 */
296/*
297 * PageGetSpecialSize
298 * Returns size of special space on a page.
299 */
300#define PageGetSpecialSize(page) \
301 ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
302
303/*
304 * Using assertions, validate that the page special pointer is OK.
305 *
306 * This is intended to catch use of the pointer before page initialization.
307 * It is implemented as a function due to the limitations of the MSVC
308 * compiler, which choked on doing all these tests within another macro. We
309 * return true so that MacroAssert() can be used while still getting the
310 * specifics from the macro failure within this function.
311 */
312static inline bool
313PageValidateSpecialPointer(Page page)
314{
315 Assert(PageIsValid(page));
316 Assert(((PageHeader) (page))->pd_special <= BLCKSZ);
317 Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData);
318
319 return true;
320}
321
322/*
323 * PageGetSpecialPointer
324 * Returns pointer to special space on a page.
325 */
326#define PageGetSpecialPointer(page) \
327( \
328 AssertMacro(PageValidateSpecialPointer(page)), \
329 (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
330)
331
332/*
333 * PageGetItem
334 * Retrieves an item on the given page.
335 *
336 * Note:
337 * This does not change the status of any of the resources passed.
338 * The semantics may change in the future.
339 */
340#define PageGetItem(page, itemId) \
341( \
342 AssertMacro(PageIsValid(page)), \
343 AssertMacro(ItemIdHasStorage(itemId)), \
344 (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
345)
346
347/*
348 * PageGetMaxOffsetNumber
349 * Returns the maximum offset number used by the given page.
350 * Since offset numbers are 1-based, this is also the number
351 * of items on the page.
352 *
353 * NOTE: if the page is not initialized (pd_lower == 0), we must
354 * return zero to ensure sane behavior. Accept double evaluation
355 * of the argument so that we can ensure this.
356 */
357#define PageGetMaxOffsetNumber(page) \
358 (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
359 ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
360 / sizeof(ItemIdData)))
361
362/*
363 * Additional macros for access to page headers. (Beware multiple evaluation
364 * of the arguments!)
365 */
366#define PageGetLSN(page) \
367 PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
368#define PageSetLSN(page, lsn) \
369 PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)
370
371#define PageHasFreeLinePointers(page) \
372 (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
373#define PageSetHasFreeLinePointers(page) \
374 (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
375#define PageClearHasFreeLinePointers(page) \
376 (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
377
378#define PageIsFull(page) \
379 (((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
380#define PageSetFull(page) \
381 (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
382#define PageClearFull(page) \
383 (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
384
385#define PageIsAllVisible(page) \
386 (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
387#define PageSetAllVisible(page) \
388 (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
389#define PageClearAllVisible(page) \
390 (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
391
392#define PageIsPrunable(page, oldestxmin) \
393( \
394 AssertMacro(TransactionIdIsNormal(oldestxmin)), \
395 TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
396 TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
397)
398#define PageSetPrunable(page, xid) \
399do { \
400 Assert(TransactionIdIsNormal(xid)); \
401 if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
402 TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
403 ((PageHeader) (page))->pd_prune_xid = (xid); \
404} while (0)
405#define PageClearPrunable(page) \
406 (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
407
408
409/* ----------------------------------------------------------------
410 * extern declarations
411 * ----------------------------------------------------------------
412 */
413#define PAI_OVERWRITE (1 << 0)
414#define PAI_IS_HEAP (1 << 1)
415
416#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
417 PageAddItemExtended(page, item, size, offsetNumber, \
418 ((overwrite) ? PAI_OVERWRITE : 0) | \
419 ((is_heap) ? PAI_IS_HEAP : 0))
420
421extern void PageInit(Page page, Size pageSize, Size specialSize);
422extern bool PageIsVerified(Page page, BlockNumber blkno);
423extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
424 OffsetNumber offsetNumber, int flags);
425extern Page PageGetTempPage(Page page);
426extern Page PageGetTempPageCopy(Page page);
427extern Page PageGetTempPageCopySpecial(Page page);
428extern void PageRestoreTempPage(Page tempPage, Page oldPage);
429extern void PageRepairFragmentation(Page page);
430extern Size PageGetFreeSpace(Page page);
431extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
432extern Size PageGetExactFreeSpace(Page page);
433extern Size PageGetHeapFreeSpace(Page page);
434extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
435extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
436extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset);
437extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
438 Item newtup, Size newsize);
439extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
440extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
441
442#endif /* BUFPAGE_H */
443