1/*
2 * xlogrecord.h
3 *
4 * Definitions for the WAL record format.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * src/include/access/xlogrecord.h
10 */
11#ifndef XLOGRECORD_H
12#define XLOGRECORD_H
13
14#include "access/rmgr.h"
15#include "access/xlogdefs.h"
16#include "port/pg_crc32c.h"
17#include "storage/block.h"
18#include "storage/relfilenode.h"
19
20/*
21 * The overall layout of an XLOG record is:
22 * Fixed-size header (XLogRecord struct)
23 * XLogRecordBlockHeader struct
24 * XLogRecordBlockHeader struct
25 * ...
26 * XLogRecordDataHeader[Short|Long] struct
27 * block data
28 * block data
29 * ...
30 * main data
31 *
32 * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
33 * rmgr-specific data not associated with a block. XLogRecord structs
34 * always start on MAXALIGN boundaries in the WAL files, but the rest of
35 * the fields are not aligned.
36 *
37 * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
38 * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
39 * used to distinguish between block references, and the main data structs.
40 */
41typedef struct XLogRecord
42{
43 uint32 xl_tot_len; /* total len of entire record */
44 TransactionId xl_xid; /* xact id */
45 XLogRecPtr xl_prev; /* ptr to previous record in log */
46 uint8 xl_info; /* flag bits, see below */
47 RmgrId xl_rmid; /* resource manager for this record */
48 /* 2 bytes of padding here, initialize to zero */
49 pg_crc32c xl_crc; /* CRC for this record */
50
51 /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
52
53} XLogRecord;
54
55#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
56
57/*
58 * The high 4 bits in xl_info may be used freely by rmgr. The
59 * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by
60 * XLogInsert caller. The rest are set internally by XLogInsert.
61 */
62#define XLR_INFO_MASK 0x0F
63#define XLR_RMGR_INFO_MASK 0xF0
64
65/*
66 * If a WAL record modifies any relation files, in ways not covered by the
67 * usual block references, this flag is set. This is not used for anything
68 * by PostgreSQL itself, but it allows external tools that read WAL and keep
69 * track of modified blocks to recognize such special record types.
70 */
71#define XLR_SPECIAL_REL_UPDATE 0x01
72
73/*
74 * Enforces consistency checks of replayed WAL at recovery. If enabled,
75 * each record will log a full-page write for each block modified by the
76 * record and will reuse it afterwards for consistency checks. The caller
77 * of XLogInsert can use this value if necessary, but if
78 * wal_consistency_checking is enabled for a rmgr this is set unconditionally.
79 */
80#define XLR_CHECK_CONSISTENCY 0x02
81
82/*
83 * Header info for block data appended to an XLOG record.
84 *
85 * 'data_length' is the length of the rmgr-specific payload data associated
86 * with this block. It does not include the possible full page image, nor
87 * XLogRecordBlockHeader struct itself.
88 *
89 * Note that we don't attempt to align the XLogRecordBlockHeader struct!
90 * So, the struct must be copied to aligned local storage before use.
91 */
92typedef struct XLogRecordBlockHeader
93{
94 uint8 id; /* block reference ID */
95 uint8 fork_flags; /* fork within the relation, and flags */
96 uint16 data_length; /* number of payload bytes (not including page
97 * image) */
98
99 /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
100 /* If BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
101 /* BlockNumber follows */
102} XLogRecordBlockHeader;
103
104#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
105
106/*
107 * Additional header information when a full-page image is included
108 * (i.e. when BKPBLOCK_HAS_IMAGE is set).
109 *
110 * The XLOG code is aware that PG data pages usually contain an unused "hole"
111 * in the middle, which contains only zero bytes. Since we know that the
112 * "hole" is all zeros, we remove it from the stored data (and it's not counted
113 * in the XLOG record's CRC, either). Hence, the amount of block data actually
114 * present is (BLCKSZ - <length of "hole" bytes>).
115 *
116 * Additionally, when wal_compression is enabled, we will try to compress full
117 * page images using the PGLZ compression algorithm, after removing the "hole".
118 * This can reduce the WAL volume, but at some extra cost of CPU spent
119 * on the compression during WAL logging. In this case, since the "hole"
120 * length cannot be calculated by subtracting the number of page image bytes
121 * from BLCKSZ, basically it needs to be stored as an extra information.
122 * But when no "hole" exists, we can assume that the "hole" length is zero
123 * and no such an extra information needs to be stored. Note that
124 * the original version of page image is stored in WAL instead of the
125 * compressed one if the number of bytes saved by compression is less than
126 * the length of extra information. Hence, when a page image is successfully
127 * compressed, the amount of block data actually present is less than
128 * BLCKSZ - the length of "hole" bytes - the length of extra information.
129 */
130typedef struct XLogRecordBlockImageHeader
131{
132 uint16 length; /* number of page image bytes */
133 uint16 hole_offset; /* number of bytes before "hole" */
134 uint8 bimg_info; /* flag bits, see below */
135
136 /*
137 * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an
138 * XLogRecordBlockCompressHeader struct follows.
139 */
140} XLogRecordBlockImageHeader;
141
142#define SizeOfXLogRecordBlockImageHeader \
143 (offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8))
144
145/* Information stored in bimg_info */
146#define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */
147#define BKPIMAGE_IS_COMPRESSED 0x02 /* page image is compressed */
148#define BKPIMAGE_APPLY 0x04 /* page image should be restored during
149 * replay */
150
151/*
152 * Extra header information used when page image has "hole" and
153 * is compressed.
154 */
155typedef struct XLogRecordBlockCompressHeader
156{
157 uint16 hole_length; /* number of bytes in "hole" */
158} XLogRecordBlockCompressHeader;
159
160#define SizeOfXLogRecordBlockCompressHeader \
161 sizeof(XLogRecordBlockCompressHeader)
162
163/*
164 * Maximum size of the header for a block reference. This is used to size a
165 * temporary buffer for constructing the header.
166 */
167#define MaxSizeOfXLogRecordBlockHeader \
168 (SizeOfXLogRecordBlockHeader + \
169 SizeOfXLogRecordBlockImageHeader + \
170 SizeOfXLogRecordBlockCompressHeader + \
171 sizeof(RelFileNode) + \
172 sizeof(BlockNumber))
173
174/*
175 * The fork number fits in the lower 4 bits in the fork_flags field. The upper
176 * bits are used for flags.
177 */
178#define BKPBLOCK_FORK_MASK 0x0F
179#define BKPBLOCK_FLAG_MASK 0xF0
180#define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
181#define BKPBLOCK_HAS_DATA 0x20
182#define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
183#define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */
184
185/*
186 * XLogRecordDataHeaderShort/Long are used for the "main data" portion of
187 * the record. If the length of the data is less than 256 bytes, the short
188 * form is used, with a single byte to hold the length. Otherwise the long
189 * form is used.
190 *
191 * (These structs are currently not used in the code, they are here just for
192 * documentation purposes).
193 */
194typedef struct XLogRecordDataHeaderShort
195{
196 uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */
197 uint8 data_length; /* number of payload bytes */
198} XLogRecordDataHeaderShort;
199
200#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
201
202typedef struct XLogRecordDataHeaderLong
203{
204 uint8 id; /* XLR_BLOCK_ID_DATA_LONG */
205 /* followed by uint32 data_length, unaligned */
206} XLogRecordDataHeaderLong;
207
208#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
209
210/*
211 * Block IDs used to distinguish different kinds of record fragments. Block
212 * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
213 * any ID number in that range (although you should stick to small numbers,
214 * because the WAL machinery is optimized for that case). A couple of ID
215 * numbers are reserved to denote the "main" data portion of the record.
216 *
217 * The maximum is currently set at 32, quite arbitrarily. Most records only
218 * need a handful of block references, but there are a few exceptions that
219 * need more.
220 */
221#define XLR_MAX_BLOCK_ID 32
222
223#define XLR_BLOCK_ID_DATA_SHORT 255
224#define XLR_BLOCK_ID_DATA_LONG 254
225#define XLR_BLOCK_ID_ORIGIN 253
226
227#endif /* XLOGRECORD_H */
228