1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * bufpage.h |
4 | * Standard POSTGRES buffer page definitions. |
5 | * |
6 | * |
7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
8 | * Portions Copyright (c) 1994, Regents of the University of California |
9 | * |
10 | * src/include/storage/bufpage.h |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | #ifndef BUFPAGE_H |
15 | #define BUFPAGE_H |
16 | |
17 | #include "access/xlogdefs.h" |
18 | #include "storage/block.h" |
19 | #include "storage/item.h" |
20 | #include "storage/off.h" |
21 | |
22 | /* |
23 | * A postgres disk page is an abstraction layered on top of a postgres |
24 | * disk block (which is simply a unit of i/o, see block.h). |
25 | * |
26 | * specifically, while a disk block can be unformatted, a postgres |
27 | * disk page is always a slotted page of the form: |
28 | * |
29 | * +----------------+---------------------------------+ |
30 | * | PageHeaderData | linp1 linp2 linp3 ... | |
31 | * +-----------+----+---------------------------------+ |
32 | * | ... linpN | | |
33 | * +-----------+--------------------------------------+ |
34 | * | ^ pd_lower | |
35 | * | | |
36 | * | v pd_upper | |
37 | * +-------------+------------------------------------+ |
38 | * | | tupleN ... | |
39 | * +-------------+------------------+-----------------+ |
40 | * | ... tuple3 tuple2 tuple1 | "special space" | |
41 | * +--------------------------------+-----------------+ |
42 | * ^ pd_special |
43 | * |
44 | * a page is full when nothing can be added between pd_lower and |
45 | * pd_upper. |
46 | * |
47 | * all blocks written out by an access method must be disk pages. |
48 | * |
49 | * EXCEPTIONS: |
50 | * |
51 | * obviously, a page is not formatted before it is initialized by |
52 | * a call to PageInit. |
53 | * |
54 | * NOTES: |
55 | * |
56 | * linp1..N form an ItemId (line pointer) array. ItemPointers point |
57 | * to a physical block number and a logical offset (line pointer |
58 | * number) within that block/page. Note that OffsetNumbers |
59 | * conventionally start at 1, not 0. |
60 | * |
61 | * tuple1..N are added "backwards" on the page. Since an ItemPointer |
62 | * offset is used to access an ItemId entry rather than an actual |
63 | * byte-offset position, tuples can be physically shuffled on a page |
64 | * whenever the need arises. This indirection also keeps crash recovery |
65 | * relatively simple, because the low-level details of page space |
66 | * management can be controlled by standard buffer page code during |
67 | * logging, and during recovery. |
68 | * |
69 | * AM-generic per-page information is kept in PageHeaderData. |
70 | * |
71 | * AM-specific per-page data (if any) is kept in the area marked "special |
72 | * space"; each AM has an "opaque" structure defined somewhere that is |
73 | * stored as the page trailer. an access method should always |
74 | * initialize its pages with PageInit and then set its own opaque |
75 | * fields. |
76 | */ |
77 | |
78 | typedef Pointer Page; |
79 | |
80 | |
81 | /* |
82 | * location (byte offset) within a page. |
83 | * |
84 | * note that this is actually limited to 2^15 because we have limited |
85 | * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h). |
86 | */ |
87 | typedef uint16 LocationIndex; |
88 | |
89 | |
90 | /* |
91 | * For historical reasons, the 64-bit LSN value is stored as two 32-bit |
92 | * values. |
93 | */ |
94 | typedef struct |
95 | { |
96 | uint32 xlogid; /* high bits */ |
97 | uint32 xrecoff; /* low bits */ |
98 | } PageXLogRecPtr; |
99 | |
100 | #define PageXLogRecPtrGet(val) \ |
101 | ((uint64) (val).xlogid << 32 | (val).xrecoff) |
102 | #define PageXLogRecPtrSet(ptr, lsn) \ |
103 | ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn)) |
104 | |
105 | /* |
106 | * disk page organization |
107 | * |
108 | * space management information generic to any page |
109 | * |
110 | * pd_lsn - identifies xlog record for last change to this page. |
111 | * pd_checksum - page checksum, if set. |
112 | * pd_flags - flag bits. |
113 | * pd_lower - offset to start of free space. |
114 | * pd_upper - offset to end of free space. |
115 | * pd_special - offset to start of special space. |
116 | * pd_pagesize_version - size in bytes and page layout version number. |
117 | * pd_prune_xid - oldest XID among potentially prunable tuples on page. |
118 | * |
119 | * The LSN is used by the buffer manager to enforce the basic rule of WAL: |
120 | * "thou shalt write xlog before data". A dirty buffer cannot be dumped |
121 | * to disk until xlog has been flushed at least as far as the page's LSN. |
122 | * |
123 | * pd_checksum stores the page checksum, if it has been set for this page; |
124 | * zero is a valid value for a checksum. If a checksum is not in use then |
125 | * we leave the field unset. This will typically mean the field is zero |
126 | * though non-zero values may also be present if databases have been |
127 | * pg_upgraded from releases prior to 9.3, when the same byte offset was |
128 | * used to store the current timelineid when the page was last updated. |
129 | * Note that there is no indication on a page as to whether the checksum |
130 | * is valid or not, a deliberate design choice which avoids the problem |
131 | * of relying on the page contents to decide whether to verify it. Hence |
132 | * there are no flag bits relating to checksums. |
133 | * |
134 | * pd_prune_xid is a hint field that helps determine whether pruning will be |
135 | * useful. It is currently unused in index pages. |
136 | * |
137 | * The page version number and page size are packed together into a single |
138 | * uint16 field. This is for historical reasons: before PostgreSQL 7.3, |
139 | * there was no concept of a page version number, and doing it this way |
140 | * lets us pretend that pre-7.3 databases have page version number zero. |
141 | * We constrain page sizes to be multiples of 256, leaving the low eight |
142 | * bits available for a version number. |
143 | * |
144 | * Minimum possible page size is perhaps 64B to fit page header, opaque space |
145 | * and a minimal tuple; of course, in reality you want it much bigger, so |
146 | * the constraint on pagesize mod 256 is not an important restriction. |
147 | * On the high end, we can only support pages up to 32KB because lp_off/lp_len |
148 | * are 15 bits. |
149 | */ |
150 | |
151 | typedef struct |
152 | { |
153 | /* XXX LSN is member of *any* block, not only page-organized ones */ |
154 | PageXLogRecPtr ; /* LSN: next byte after last byte of xlog |
155 | * record for last change to this page */ |
156 | uint16 ; /* checksum */ |
157 | uint16 ; /* flag bits, see below */ |
158 | LocationIndex ; /* offset to start of free space */ |
159 | LocationIndex ; /* offset to end of free space */ |
160 | LocationIndex ; /* offset to start of special space */ |
161 | uint16 ; |
162 | TransactionId ; /* oldest prunable XID, or zero if none */ |
163 | ItemIdData [FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ |
164 | } ; |
165 | |
166 | typedef PageHeaderData *; |
167 | |
168 | /* |
169 | * pd_flags contains the following flag bits. Undefined bits are initialized |
170 | * to zero and may be used in the future. |
171 | * |
172 | * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before |
173 | * pd_lower. This should be considered a hint rather than the truth, since |
174 | * changes to it are not WAL-logged. |
175 | * |
176 | * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the |
177 | * page for its new tuple version; this suggests that a prune is needed. |
178 | * Again, this is just a hint. |
179 | */ |
180 | #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */ |
181 | #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */ |
182 | #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to |
183 | * everyone */ |
184 | |
185 | #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */ |
186 | |
187 | /* |
188 | * Page layout version number 0 is for pre-7.3 Postgres releases. |
189 | * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout. |
190 | * Release 8.0 uses 2; it changed the HeapTupleHeader layout again. |
191 | * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits. |
192 | * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and |
193 | * added the pd_flags field (by stealing some bits from pd_tli), |
194 | * as well as adding the pd_prune_xid field (which enlarges the header). |
195 | * |
196 | * As of Release 9.3, the checksum version must also be considered when |
197 | * handling pages. |
198 | */ |
199 | #define PG_PAGE_LAYOUT_VERSION 4 |
200 | #define PG_DATA_CHECKSUM_VERSION 1 |
201 | |
202 | /* ---------------------------------------------------------------- |
203 | * page support macros |
204 | * ---------------------------------------------------------------- |
205 | */ |
206 | |
207 | /* |
208 | * PageIsValid |
209 | * True iff page is valid. |
210 | */ |
211 | #define PageIsValid(page) PointerIsValid(page) |
212 | |
213 | /* |
214 | * line pointer(s) do not count as part of header |
215 | */ |
216 | #define (offsetof(PageHeaderData, pd_linp)) |
217 | |
218 | /* |
219 | * PageIsEmpty |
220 | * returns true iff no itemid has been allocated on the page |
221 | */ |
222 | #define PageIsEmpty(page) \ |
223 | (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData) |
224 | |
225 | /* |
226 | * PageIsNew |
227 | * returns true iff page has not been initialized (by PageInit) |
228 | */ |
229 | #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0) |
230 | |
231 | /* |
232 | * PageGetItemId |
233 | * Returns an item identifier of a page. |
234 | */ |
235 | #define PageGetItemId(page, offsetNumber) \ |
236 | ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1])) |
237 | |
238 | /* |
239 | * PageGetContents |
240 | * To be used in cases where the page does not contain line pointers. |
241 | * |
242 | * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result. |
243 | * Now it is. Beware of old code that might think the offset to the contents |
244 | * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData). |
245 | */ |
246 | #define PageGetContents(page) \ |
247 | ((char *) (page) + MAXALIGN(SizeOfPageHeaderData)) |
248 | |
249 | /* ---------------- |
250 | * macros to access page size info |
251 | * ---------------- |
252 | */ |
253 | |
254 | /* |
255 | * PageSizeIsValid |
256 | * True iff the page size is valid. |
257 | */ |
258 | #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ) |
259 | |
260 | /* |
261 | * PageGetPageSize |
262 | * Returns the page size of a page. |
263 | * |
264 | * this can only be called on a formatted page (unlike |
265 | * BufferGetPageSize, which can be called on an unformatted page). |
266 | * however, it can be called on a page that is not stored in a buffer. |
267 | */ |
268 | #define PageGetPageSize(page) \ |
269 | ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00)) |
270 | |
271 | /* |
272 | * PageGetPageLayoutVersion |
273 | * Returns the page layout version of a page. |
274 | */ |
275 | #define PageGetPageLayoutVersion(page) \ |
276 | (((PageHeader) (page))->pd_pagesize_version & 0x00FF) |
277 | |
278 | /* |
279 | * PageSetPageSizeAndVersion |
280 | * Sets the page size and page layout version number of a page. |
281 | * |
282 | * We could support setting these two values separately, but there's |
283 | * no real need for it at the moment. |
284 | */ |
285 | #define PageSetPageSizeAndVersion(page, size, version) \ |
286 | ( \ |
287 | AssertMacro(((size) & 0xFF00) == (size)), \ |
288 | AssertMacro(((version) & 0x00FF) == (version)), \ |
289 | ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \ |
290 | ) |
291 | |
292 | /* ---------------- |
293 | * page special data macros |
294 | * ---------------- |
295 | */ |
296 | /* |
297 | * PageGetSpecialSize |
298 | * Returns size of special space on a page. |
299 | */ |
300 | #define PageGetSpecialSize(page) \ |
301 | ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special)) |
302 | |
303 | /* |
304 | * Using assertions, validate that the page special pointer is OK. |
305 | * |
306 | * This is intended to catch use of the pointer before page initialization. |
307 | * It is implemented as a function due to the limitations of the MSVC |
308 | * compiler, which choked on doing all these tests within another macro. We |
309 | * return true so that MacroAssert() can be used while still getting the |
310 | * specifics from the macro failure within this function. |
311 | */ |
312 | static inline bool |
313 | PageValidateSpecialPointer(Page page) |
314 | { |
315 | Assert(PageIsValid(page)); |
316 | Assert(((PageHeader) (page))->pd_special <= BLCKSZ); |
317 | Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData); |
318 | |
319 | return true; |
320 | } |
321 | |
322 | /* |
323 | * PageGetSpecialPointer |
324 | * Returns pointer to special space on a page. |
325 | */ |
326 | #define PageGetSpecialPointer(page) \ |
327 | ( \ |
328 | AssertMacro(PageValidateSpecialPointer(page)), \ |
329 | (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \ |
330 | ) |
331 | |
332 | /* |
333 | * PageGetItem |
334 | * Retrieves an item on the given page. |
335 | * |
336 | * Note: |
337 | * This does not change the status of any of the resources passed. |
338 | * The semantics may change in the future. |
339 | */ |
340 | #define PageGetItem(page, itemId) \ |
341 | ( \ |
342 | AssertMacro(PageIsValid(page)), \ |
343 | AssertMacro(ItemIdHasStorage(itemId)), \ |
344 | (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \ |
345 | ) |
346 | |
347 | /* |
348 | * PageGetMaxOffsetNumber |
349 | * Returns the maximum offset number used by the given page. |
350 | * Since offset numbers are 1-based, this is also the number |
351 | * of items on the page. |
352 | * |
353 | * NOTE: if the page is not initialized (pd_lower == 0), we must |
354 | * return zero to ensure sane behavior. Accept double evaluation |
355 | * of the argument so that we can ensure this. |
356 | */ |
357 | #define PageGetMaxOffsetNumber(page) \ |
358 | (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \ |
359 | ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \ |
360 | / sizeof(ItemIdData))) |
361 | |
362 | /* |
363 | * Additional macros for access to page headers. (Beware multiple evaluation |
364 | * of the arguments!) |
365 | */ |
366 | #define PageGetLSN(page) \ |
367 | PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn) |
368 | #define PageSetLSN(page, lsn) \ |
369 | PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn) |
370 | |
371 | #define PageHasFreeLinePointers(page) \ |
372 | (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES) |
373 | #define PageSetHasFreeLinePointers(page) \ |
374 | (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES) |
375 | #define PageClearHasFreeLinePointers(page) \ |
376 | (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES) |
377 | |
378 | #define PageIsFull(page) \ |
379 | (((PageHeader) (page))->pd_flags & PD_PAGE_FULL) |
380 | #define PageSetFull(page) \ |
381 | (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL) |
382 | #define PageClearFull(page) \ |
383 | (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL) |
384 | |
385 | #define PageIsAllVisible(page) \ |
386 | (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE) |
387 | #define PageSetAllVisible(page) \ |
388 | (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE) |
389 | #define PageClearAllVisible(page) \ |
390 | (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE) |
391 | |
392 | #define PageIsPrunable(page, oldestxmin) \ |
393 | ( \ |
394 | AssertMacro(TransactionIdIsNormal(oldestxmin)), \ |
395 | TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \ |
396 | TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \ |
397 | ) |
398 | #define PageSetPrunable(page, xid) \ |
399 | do { \ |
400 | Assert(TransactionIdIsNormal(xid)); \ |
401 | if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ |
402 | TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ |
403 | ((PageHeader) (page))->pd_prune_xid = (xid); \ |
404 | } while (0) |
405 | #define PageClearPrunable(page) \ |
406 | (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) |
407 | |
408 | |
409 | /* ---------------------------------------------------------------- |
410 | * extern declarations |
411 | * ---------------------------------------------------------------- |
412 | */ |
413 | #define PAI_OVERWRITE (1 << 0) |
414 | #define PAI_IS_HEAP (1 << 1) |
415 | |
416 | #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \ |
417 | PageAddItemExtended(page, item, size, offsetNumber, \ |
418 | ((overwrite) ? PAI_OVERWRITE : 0) | \ |
419 | ((is_heap) ? PAI_IS_HEAP : 0)) |
420 | |
421 | extern void PageInit(Page page, Size pageSize, Size specialSize); |
422 | extern bool PageIsVerified(Page page, BlockNumber blkno); |
423 | extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, |
424 | OffsetNumber offsetNumber, int flags); |
425 | extern Page PageGetTempPage(Page page); |
426 | extern Page PageGetTempPageCopy(Page page); |
427 | extern Page PageGetTempPageCopySpecial(Page page); |
428 | extern void (Page tempPage, Page oldPage); |
429 | extern void (Page page); |
430 | extern Size PageGetFreeSpace(Page page); |
431 | extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups); |
432 | extern Size PageGetExactFreeSpace(Page page); |
433 | extern Size PageGetHeapFreeSpace(Page page); |
434 | extern void PageIndexTupleDelete(Page page, OffsetNumber offset); |
435 | extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems); |
436 | extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset); |
437 | extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, |
438 | Item newtup, Size newsize); |
439 | extern char *PageSetChecksumCopy(Page page, BlockNumber blkno); |
440 | extern void PageSetChecksumInplace(Page page, BlockNumber blkno); |
441 | |
442 | #endif /* BUFPAGE_H */ |
443 | |