| 1 | /* |
| 2 | * xlogrecord.h |
| 3 | * |
| 4 | * Definitions for the WAL record format. |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * src/include/access/xlogrecord.h |
| 10 | */ |
| 11 | #ifndef XLOGRECORD_H |
| 12 | #define XLOGRECORD_H |
| 13 | |
| 14 | #include "access/rmgr.h" |
| 15 | #include "access/xlogdefs.h" |
| 16 | #include "port/pg_crc32c.h" |
| 17 | #include "storage/block.h" |
| 18 | #include "storage/relfilenode.h" |
| 19 | |
| 20 | /* |
| 21 | * The overall layout of an XLOG record is: |
| 22 | * Fixed-size header (XLogRecord struct) |
| 23 | * XLogRecordBlockHeader struct |
| 24 | * XLogRecordBlockHeader struct |
| 25 | * ... |
| 26 | * XLogRecordDataHeader[Short|Long] struct |
| 27 | * block data |
| 28 | * block data |
| 29 | * ... |
| 30 | * main data |
| 31 | * |
| 32 | * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of |
| 33 | * rmgr-specific data not associated with a block. XLogRecord structs |
| 34 | * always start on MAXALIGN boundaries in the WAL files, but the rest of |
| 35 | * the fields are not aligned. |
| 36 | * |
| 37 | * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and |
| 38 | * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's |
| 39 | * used to distinguish between block references, and the main data structs. |
| 40 | */ |
| 41 | typedef struct XLogRecord |
| 42 | { |
| 43 | uint32 xl_tot_len; /* total len of entire record */ |
| 44 | TransactionId xl_xid; /* xact id */ |
| 45 | XLogRecPtr xl_prev; /* ptr to previous record in log */ |
| 46 | uint8 xl_info; /* flag bits, see below */ |
| 47 | RmgrId xl_rmid; /* resource manager for this record */ |
| 48 | /* 2 bytes of padding here, initialize to zero */ |
| 49 | pg_crc32c xl_crc; /* CRC for this record */ |
| 50 | |
| 51 | /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */ |
| 52 | |
| 53 | } XLogRecord; |
| 54 | |
| 55 | #define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c)) |
| 56 | |
| 57 | /* |
| 58 | * The high 4 bits in xl_info may be used freely by rmgr. The |
| 59 | * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by |
| 60 | * XLogInsert caller. The rest are set internally by XLogInsert. |
| 61 | */ |
| 62 | #define XLR_INFO_MASK 0x0F |
| 63 | #define XLR_RMGR_INFO_MASK 0xF0 |
| 64 | |
| 65 | /* |
| 66 | * If a WAL record modifies any relation files, in ways not covered by the |
| 67 | * usual block references, this flag is set. This is not used for anything |
| 68 | * by PostgreSQL itself, but it allows external tools that read WAL and keep |
| 69 | * track of modified blocks to recognize such special record types. |
| 70 | */ |
| 71 | #define XLR_SPECIAL_REL_UPDATE 0x01 |
| 72 | |
| 73 | /* |
| 74 | * Enforces consistency checks of replayed WAL at recovery. If enabled, |
| 75 | * each record will log a full-page write for each block modified by the |
| 76 | * record and will reuse it afterwards for consistency checks. The caller |
| 77 | * of XLogInsert can use this value if necessary, but if |
| 78 | * wal_consistency_checking is enabled for a rmgr this is set unconditionally. |
| 79 | */ |
| 80 | #define XLR_CHECK_CONSISTENCY 0x02 |
| 81 | |
| 82 | /* |
| 83 | * Header info for block data appended to an XLOG record. |
| 84 | * |
| 85 | * 'data_length' is the length of the rmgr-specific payload data associated |
| 86 | * with this block. It does not include the possible full page image, nor |
| 87 | * XLogRecordBlockHeader struct itself. |
| 88 | * |
| 89 | * Note that we don't attempt to align the XLogRecordBlockHeader struct! |
| 90 | * So, the struct must be copied to aligned local storage before use. |
| 91 | */ |
| 92 | typedef struct |
| 93 | { |
| 94 | uint8 ; /* block reference ID */ |
| 95 | uint8 ; /* fork within the relation, and flags */ |
| 96 | uint16 ; /* number of payload bytes (not including page |
| 97 | * image) */ |
| 98 | |
| 99 | /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */ |
| 100 | /* If BKPBLOCK_SAME_REL is not set, a RelFileNode follows */ |
| 101 | /* BlockNumber follows */ |
| 102 | } ; |
| 103 | |
| 104 | #define (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16)) |
| 105 | |
| 106 | /* |
| 107 | * Additional header information when a full-page image is included |
| 108 | * (i.e. when BKPBLOCK_HAS_IMAGE is set). |
| 109 | * |
| 110 | * The XLOG code is aware that PG data pages usually contain an unused "hole" |
| 111 | * in the middle, which contains only zero bytes. Since we know that the |
| 112 | * "hole" is all zeros, we remove it from the stored data (and it's not counted |
| 113 | * in the XLOG record's CRC, either). Hence, the amount of block data actually |
| 114 | * present is (BLCKSZ - <length of "hole" bytes>). |
| 115 | * |
| 116 | * Additionally, when wal_compression is enabled, we will try to compress full |
| 117 | * page images using the PGLZ compression algorithm, after removing the "hole". |
| 118 | * This can reduce the WAL volume, but at some extra cost of CPU spent |
| 119 | * on the compression during WAL logging. In this case, since the "hole" |
| 120 | * length cannot be calculated by subtracting the number of page image bytes |
| 121 | * from BLCKSZ, basically it needs to be stored as an extra information. |
| 122 | * But when no "hole" exists, we can assume that the "hole" length is zero |
| 123 | * and no such an extra information needs to be stored. Note that |
| 124 | * the original version of page image is stored in WAL instead of the |
| 125 | * compressed one if the number of bytes saved by compression is less than |
| 126 | * the length of extra information. Hence, when a page image is successfully |
| 127 | * compressed, the amount of block data actually present is less than |
| 128 | * BLCKSZ - the length of "hole" bytes - the length of extra information. |
| 129 | */ |
| 130 | typedef struct |
| 131 | { |
| 132 | uint16 ; /* number of page image bytes */ |
| 133 | uint16 ; /* number of bytes before "hole" */ |
| 134 | uint8 ; /* flag bits, see below */ |
| 135 | |
| 136 | /* |
| 137 | * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an |
| 138 | * XLogRecordBlockCompressHeader struct follows. |
| 139 | */ |
| 140 | } ; |
| 141 | |
| 142 | #define \ |
| 143 | (offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8)) |
| 144 | |
| 145 | /* Information stored in bimg_info */ |
| 146 | #define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */ |
| 147 | #define BKPIMAGE_IS_COMPRESSED 0x02 /* page image is compressed */ |
| 148 | #define BKPIMAGE_APPLY 0x04 /* page image should be restored during |
| 149 | * replay */ |
| 150 | |
| 151 | /* |
| 152 | * Extra header information used when page image has "hole" and |
| 153 | * is compressed. |
| 154 | */ |
| 155 | typedef struct |
| 156 | { |
| 157 | uint16 ; /* number of bytes in "hole" */ |
| 158 | } ; |
| 159 | |
| 160 | #define \ |
| 161 | sizeof(XLogRecordBlockCompressHeader) |
| 162 | |
| 163 | /* |
| 164 | * Maximum size of the header for a block reference. This is used to size a |
| 165 | * temporary buffer for constructing the header. |
| 166 | */ |
| 167 | #define \ |
| 168 | (SizeOfXLogRecordBlockHeader + \ |
| 169 | SizeOfXLogRecordBlockImageHeader + \ |
| 170 | SizeOfXLogRecordBlockCompressHeader + \ |
| 171 | sizeof(RelFileNode) + \ |
| 172 | sizeof(BlockNumber)) |
| 173 | |
| 174 | /* |
| 175 | * The fork number fits in the lower 4 bits in the fork_flags field. The upper |
| 176 | * bits are used for flags. |
| 177 | */ |
| 178 | #define BKPBLOCK_FORK_MASK 0x0F |
| 179 | #define BKPBLOCK_FLAG_MASK 0xF0 |
| 180 | #define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */ |
| 181 | #define BKPBLOCK_HAS_DATA 0x20 |
| 182 | #define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */ |
| 183 | #define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */ |
| 184 | |
| 185 | /* |
| 186 | * XLogRecordDataHeaderShort/Long are used for the "main data" portion of |
| 187 | * the record. If the length of the data is less than 256 bytes, the short |
| 188 | * form is used, with a single byte to hold the length. Otherwise the long |
| 189 | * form is used. |
| 190 | * |
| 191 | * (These structs are currently not used in the code, they are here just for |
| 192 | * documentation purposes). |
| 193 | */ |
| 194 | typedef struct |
| 195 | { |
| 196 | uint8 ; /* XLR_BLOCK_ID_DATA_SHORT */ |
| 197 | uint8 ; /* number of payload bytes */ |
| 198 | } ; |
| 199 | |
| 200 | #define (sizeof(uint8) * 2) |
| 201 | |
| 202 | typedef struct |
| 203 | { |
| 204 | uint8 ; /* XLR_BLOCK_ID_DATA_LONG */ |
| 205 | /* followed by uint32 data_length, unaligned */ |
| 206 | } ; |
| 207 | |
| 208 | #define (sizeof(uint8) + sizeof(uint32)) |
| 209 | |
| 210 | /* |
| 211 | * Block IDs used to distinguish different kinds of record fragments. Block |
| 212 | * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use |
| 213 | * any ID number in that range (although you should stick to small numbers, |
| 214 | * because the WAL machinery is optimized for that case). A couple of ID |
| 215 | * numbers are reserved to denote the "main" data portion of the record. |
| 216 | * |
| 217 | * The maximum is currently set at 32, quite arbitrarily. Most records only |
| 218 | * need a handful of block references, but there are a few exceptions that |
| 219 | * need more. |
| 220 | */ |
| 221 | #define XLR_MAX_BLOCK_ID 32 |
| 222 | |
| 223 | #define XLR_BLOCK_ID_DATA_SHORT 255 |
| 224 | #define XLR_BLOCK_ID_DATA_LONG 254 |
| 225 | #define XLR_BLOCK_ID_ORIGIN 253 |
| 226 | |
| 227 | #endif /* XLOGRECORD_H */ |
| 228 | |