1 | /* |
2 | * xlogrecord.h |
3 | * |
4 | * Definitions for the WAL record format. |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * src/include/access/xlogrecord.h |
10 | */ |
11 | #ifndef XLOGRECORD_H |
12 | #define XLOGRECORD_H |
13 | |
14 | #include "access/rmgr.h" |
15 | #include "access/xlogdefs.h" |
16 | #include "port/pg_crc32c.h" |
17 | #include "storage/block.h" |
18 | #include "storage/relfilenode.h" |
19 | |
20 | /* |
21 | * The overall layout of an XLOG record is: |
22 | * Fixed-size header (XLogRecord struct) |
23 | * XLogRecordBlockHeader struct |
24 | * XLogRecordBlockHeader struct |
25 | * ... |
26 | * XLogRecordDataHeader[Short|Long] struct |
27 | * block data |
28 | * block data |
29 | * ... |
30 | * main data |
31 | * |
32 | * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of |
33 | * rmgr-specific data not associated with a block. XLogRecord structs |
34 | * always start on MAXALIGN boundaries in the WAL files, but the rest of |
35 | * the fields are not aligned. |
36 | * |
37 | * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and |
38 | * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's |
39 | * used to distinguish between block references, and the main data structs. |
40 | */ |
41 | typedef struct XLogRecord |
42 | { |
43 | uint32 xl_tot_len; /* total len of entire record */ |
44 | TransactionId xl_xid; /* xact id */ |
45 | XLogRecPtr xl_prev; /* ptr to previous record in log */ |
46 | uint8 xl_info; /* flag bits, see below */ |
47 | RmgrId xl_rmid; /* resource manager for this record */ |
48 | /* 2 bytes of padding here, initialize to zero */ |
49 | pg_crc32c xl_crc; /* CRC for this record */ |
50 | |
51 | /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */ |
52 | |
53 | } XLogRecord; |
54 | |
55 | #define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c)) |
56 | |
57 | /* |
58 | * The high 4 bits in xl_info may be used freely by rmgr. The |
59 | * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by |
60 | * XLogInsert caller. The rest are set internally by XLogInsert. |
61 | */ |
62 | #define XLR_INFO_MASK 0x0F |
63 | #define XLR_RMGR_INFO_MASK 0xF0 |
64 | |
65 | /* |
66 | * If a WAL record modifies any relation files, in ways not covered by the |
67 | * usual block references, this flag is set. This is not used for anything |
68 | * by PostgreSQL itself, but it allows external tools that read WAL and keep |
69 | * track of modified blocks to recognize such special record types. |
70 | */ |
71 | #define XLR_SPECIAL_REL_UPDATE 0x01 |
72 | |
73 | /* |
74 | * Enforces consistency checks of replayed WAL at recovery. If enabled, |
75 | * each record will log a full-page write for each block modified by the |
76 | * record and will reuse it afterwards for consistency checks. The caller |
77 | * of XLogInsert can use this value if necessary, but if |
78 | * wal_consistency_checking is enabled for a rmgr this is set unconditionally. |
79 | */ |
80 | #define XLR_CHECK_CONSISTENCY 0x02 |
81 | |
82 | /* |
83 | * Header info for block data appended to an XLOG record. |
84 | * |
85 | * 'data_length' is the length of the rmgr-specific payload data associated |
86 | * with this block. It does not include the possible full page image, nor |
87 | * XLogRecordBlockHeader struct itself. |
88 | * |
89 | * Note that we don't attempt to align the XLogRecordBlockHeader struct! |
90 | * So, the struct must be copied to aligned local storage before use. |
91 | */ |
92 | typedef struct |
93 | { |
94 | uint8 ; /* block reference ID */ |
95 | uint8 ; /* fork within the relation, and flags */ |
96 | uint16 ; /* number of payload bytes (not including page |
97 | * image) */ |
98 | |
99 | /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */ |
100 | /* If BKPBLOCK_SAME_REL is not set, a RelFileNode follows */ |
101 | /* BlockNumber follows */ |
102 | } ; |
103 | |
104 | #define (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16)) |
105 | |
106 | /* |
107 | * Additional header information when a full-page image is included |
108 | * (i.e. when BKPBLOCK_HAS_IMAGE is set). |
109 | * |
110 | * The XLOG code is aware that PG data pages usually contain an unused "hole" |
111 | * in the middle, which contains only zero bytes. Since we know that the |
112 | * "hole" is all zeros, we remove it from the stored data (and it's not counted |
113 | * in the XLOG record's CRC, either). Hence, the amount of block data actually |
114 | * present is (BLCKSZ - <length of "hole" bytes>). |
115 | * |
116 | * Additionally, when wal_compression is enabled, we will try to compress full |
117 | * page images using the PGLZ compression algorithm, after removing the "hole". |
118 | * This can reduce the WAL volume, but at some extra cost of CPU spent |
119 | * on the compression during WAL logging. In this case, since the "hole" |
120 | * length cannot be calculated by subtracting the number of page image bytes |
121 | * from BLCKSZ, basically it needs to be stored as an extra information. |
122 | * But when no "hole" exists, we can assume that the "hole" length is zero |
123 | * and no such an extra information needs to be stored. Note that |
124 | * the original version of page image is stored in WAL instead of the |
125 | * compressed one if the number of bytes saved by compression is less than |
126 | * the length of extra information. Hence, when a page image is successfully |
127 | * compressed, the amount of block data actually present is less than |
128 | * BLCKSZ - the length of "hole" bytes - the length of extra information. |
129 | */ |
130 | typedef struct |
131 | { |
132 | uint16 ; /* number of page image bytes */ |
133 | uint16 ; /* number of bytes before "hole" */ |
134 | uint8 ; /* flag bits, see below */ |
135 | |
136 | /* |
137 | * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an |
138 | * XLogRecordBlockCompressHeader struct follows. |
139 | */ |
140 | } ; |
141 | |
142 | #define \ |
143 | (offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8)) |
144 | |
145 | /* Information stored in bimg_info */ |
146 | #define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */ |
147 | #define BKPIMAGE_IS_COMPRESSED 0x02 /* page image is compressed */ |
148 | #define BKPIMAGE_APPLY 0x04 /* page image should be restored during |
149 | * replay */ |
150 | |
151 | /* |
152 | * Extra header information used when page image has "hole" and |
153 | * is compressed. |
154 | */ |
155 | typedef struct |
156 | { |
157 | uint16 ; /* number of bytes in "hole" */ |
158 | } ; |
159 | |
160 | #define \ |
161 | sizeof(XLogRecordBlockCompressHeader) |
162 | |
163 | /* |
164 | * Maximum size of the header for a block reference. This is used to size a |
165 | * temporary buffer for constructing the header. |
166 | */ |
167 | #define \ |
168 | (SizeOfXLogRecordBlockHeader + \ |
169 | SizeOfXLogRecordBlockImageHeader + \ |
170 | SizeOfXLogRecordBlockCompressHeader + \ |
171 | sizeof(RelFileNode) + \ |
172 | sizeof(BlockNumber)) |
173 | |
174 | /* |
175 | * The fork number fits in the lower 4 bits in the fork_flags field. The upper |
176 | * bits are used for flags. |
177 | */ |
178 | #define BKPBLOCK_FORK_MASK 0x0F |
179 | #define BKPBLOCK_FLAG_MASK 0xF0 |
180 | #define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */ |
181 | #define BKPBLOCK_HAS_DATA 0x20 |
182 | #define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */ |
183 | #define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */ |
184 | |
185 | /* |
186 | * XLogRecordDataHeaderShort/Long are used for the "main data" portion of |
187 | * the record. If the length of the data is less than 256 bytes, the short |
188 | * form is used, with a single byte to hold the length. Otherwise the long |
189 | * form is used. |
190 | * |
191 | * (These structs are currently not used in the code, they are here just for |
192 | * documentation purposes). |
193 | */ |
194 | typedef struct |
195 | { |
196 | uint8 ; /* XLR_BLOCK_ID_DATA_SHORT */ |
197 | uint8 ; /* number of payload bytes */ |
198 | } ; |
199 | |
200 | #define (sizeof(uint8) * 2) |
201 | |
202 | typedef struct |
203 | { |
204 | uint8 ; /* XLR_BLOCK_ID_DATA_LONG */ |
205 | /* followed by uint32 data_length, unaligned */ |
206 | } ; |
207 | |
208 | #define (sizeof(uint8) + sizeof(uint32)) |
209 | |
210 | /* |
211 | * Block IDs used to distinguish different kinds of record fragments. Block |
212 | * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use |
213 | * any ID number in that range (although you should stick to small numbers, |
214 | * because the WAL machinery is optimized for that case). A couple of ID |
215 | * numbers are reserved to denote the "main" data portion of the record. |
216 | * |
217 | * The maximum is currently set at 32, quite arbitrarily. Most records only |
218 | * need a handful of block references, but there are a few exceptions that |
219 | * need more. |
220 | */ |
221 | #define XLR_MAX_BLOCK_ID 32 |
222 | |
223 | #define XLR_BLOCK_ID_DATA_SHORT 255 |
224 | #define XLR_BLOCK_ID_DATA_LONG 254 |
225 | #define XLR_BLOCK_ID_ORIGIN 253 |
226 | |
227 | #endif /* XLOGRECORD_H */ |
228 | |