| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * pg_control.h |
| 4 | * The system control file "pg_control" is not a heap relation. |
| 5 | * However, we define it here so that the format is documented. |
| 6 | * |
| 7 | * |
| 8 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 9 | * Portions Copyright (c) 1994, Regents of the University of California |
| 10 | * |
| 11 | * src/include/catalog/pg_control.h |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | #ifndef PG_CONTROL_H |
| 16 | #define PG_CONTROL_H |
| 17 | |
| 18 | #include "access/transam.h" |
| 19 | #include "access/xlogdefs.h" |
| 20 | #include "pgtime.h" /* for pg_time_t */ |
| 21 | #include "port/pg_crc32c.h" |
| 22 | |
| 23 | |
| 24 | /* Version identifier for this pg_control format */ |
| 25 | #define PG_CONTROL_VERSION 1201 |
| 26 | |
| 27 | /* Nonce key length, see below */ |
| 28 | #define MOCK_AUTH_NONCE_LEN 32 |
| 29 | |
| 30 | /* |
| 31 | * Body of CheckPoint XLOG records. This is declared here because we keep |
| 32 | * a copy of the latest one in pg_control for possible disaster recovery. |
| 33 | * Changing this struct requires a PG_CONTROL_VERSION bump. |
| 34 | */ |
| 35 | typedef struct CheckPoint |
| 36 | { |
| 37 | XLogRecPtr redo; /* next RecPtr available when we began to |
| 38 | * create CheckPoint (i.e. REDO start point) */ |
| 39 | TimeLineID ThisTimeLineID; /* current TLI */ |
| 40 | TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new |
| 41 | * timeline (equals ThisTimeLineID otherwise) */ |
| 42 | bool fullPageWrites; /* current full_page_writes */ |
| 43 | FullTransactionId nextFullXid; /* next free full transaction ID */ |
| 44 | Oid nextOid; /* next free OID */ |
| 45 | MultiXactId nextMulti; /* next free MultiXactId */ |
| 46 | MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ |
| 47 | TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ |
| 48 | Oid oldestXidDB; /* database with minimum datfrozenxid */ |
| 49 | MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */ |
| 50 | Oid oldestMultiDB; /* database with minimum datminmxid */ |
| 51 | pg_time_t time; /* time stamp of checkpoint */ |
| 52 | TransactionId oldestCommitTsXid; /* oldest Xid with valid commit |
| 53 | * timestamp */ |
| 54 | TransactionId newestCommitTsXid; /* newest Xid with valid commit |
| 55 | * timestamp */ |
| 56 | |
| 57 | /* |
| 58 | * Oldest XID still running. This is only needed to initialize hot standby |
| 59 | * mode from an online checkpoint, so we only bother calculating this for |
| 60 | * online checkpoints and only when wal_level is replica. Otherwise it's |
| 61 | * set to InvalidTransactionId. |
| 62 | */ |
| 63 | TransactionId oldestActiveXid; |
| 64 | } CheckPoint; |
| 65 | |
| 66 | /* XLOG info values for XLOG rmgr */ |
| 67 | #define XLOG_CHECKPOINT_SHUTDOWN 0x00 |
| 68 | #define XLOG_CHECKPOINT_ONLINE 0x10 |
| 69 | #define XLOG_NOOP 0x20 |
| 70 | #define XLOG_NEXTOID 0x30 |
| 71 | #define XLOG_SWITCH 0x40 |
| 72 | #define XLOG_BACKUP_END 0x50 |
| 73 | #define XLOG_PARAMETER_CHANGE 0x60 |
| 74 | #define XLOG_RESTORE_POINT 0x70 |
| 75 | #define XLOG_FPW_CHANGE 0x80 |
| 76 | #define XLOG_END_OF_RECOVERY 0x90 |
| 77 | #define XLOG_FPI_FOR_HINT 0xA0 |
| 78 | #define XLOG_FPI 0xB0 |
| 79 | |
| 80 | |
| 81 | /* |
| 82 | * System status indicator. Note this is stored in pg_control; if you change |
| 83 | * it, you must bump PG_CONTROL_VERSION |
| 84 | */ |
| 85 | typedef enum DBState |
| 86 | { |
| 87 | DB_STARTUP = 0, |
| 88 | DB_SHUTDOWNED, |
| 89 | DB_SHUTDOWNED_IN_RECOVERY, |
| 90 | DB_SHUTDOWNING, |
| 91 | DB_IN_CRASH_RECOVERY, |
| 92 | DB_IN_ARCHIVE_RECOVERY, |
| 93 | DB_IN_PRODUCTION |
| 94 | } DBState; |
| 95 | |
| 96 | /* |
| 97 | * Contents of pg_control. |
| 98 | */ |
| 99 | |
| 100 | typedef struct ControlFileData |
| 101 | { |
| 102 | /* |
| 103 | * Unique system identifier --- to ensure we match up xlog files with the |
| 104 | * installation that produced them. |
| 105 | */ |
| 106 | uint64 system_identifier; |
| 107 | |
| 108 | /* |
| 109 | * Version identifier information. Keep these fields at the same offset, |
| 110 | * especially pg_control_version; they won't be real useful if they move |
| 111 | * around. (For historical reasons they must be 8 bytes into the file |
| 112 | * rather than immediately at the front.) |
| 113 | * |
| 114 | * pg_control_version identifies the format of pg_control itself. |
| 115 | * catalog_version_no identifies the format of the system catalogs. |
| 116 | * |
| 117 | * There are additional version identifiers in individual files; for |
| 118 | * example, WAL logs contain per-page magic numbers that can serve as |
| 119 | * version cues for the WAL log. |
| 120 | */ |
| 121 | uint32 pg_control_version; /* PG_CONTROL_VERSION */ |
| 122 | uint32 catalog_version_no; /* see catversion.h */ |
| 123 | |
| 124 | /* |
| 125 | * System status data |
| 126 | */ |
| 127 | DBState state; /* see enum above */ |
| 128 | pg_time_t time; /* time stamp of last pg_control update */ |
| 129 | XLogRecPtr checkPoint; /* last check point record ptr */ |
| 130 | |
| 131 | CheckPoint checkPointCopy; /* copy of last check point record */ |
| 132 | |
| 133 | XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */ |
| 134 | |
| 135 | /* |
| 136 | * These two values determine the minimum point we must recover up to |
| 137 | * before starting up: |
| 138 | * |
| 139 | * minRecoveryPoint is updated to the latest replayed LSN whenever we |
| 140 | * flush a data change during archive recovery. That guards against |
| 141 | * starting archive recovery, aborting it, and restarting with an earlier |
| 142 | * stop location. If we've already flushed data changes from WAL record X |
| 143 | * to disk, we mustn't start up until we reach X again. Zero when not |
| 144 | * doing archive recovery. |
| 145 | * |
| 146 | * backupStartPoint is the redo pointer of the backup start checkpoint, if |
| 147 | * we are recovering from an online backup and haven't reached the end of |
| 148 | * backup yet. It is reset to zero when the end of backup is reached, and |
| 149 | * we mustn't start up before that. A boolean would suffice otherwise, but |
| 150 | * we use the redo pointer as a cross-check when we see an end-of-backup |
| 151 | * record, to make sure the end-of-backup record corresponds the base |
| 152 | * backup we're recovering from. |
| 153 | * |
| 154 | * backupEndPoint is the backup end location, if we are recovering from an |
| 155 | * online backup which was taken from the standby and haven't reached the |
| 156 | * end of backup yet. It is initialized to the minimum recovery point in |
| 157 | * pg_control which was backed up last. It is reset to zero when the end |
| 158 | * of backup is reached, and we mustn't start up before that. |
| 159 | * |
| 160 | * If backupEndRequired is true, we know for sure that we're restoring |
| 161 | * from a backup, and must see a backup-end record before we can safely |
| 162 | * start up. If it's false, but backupStartPoint is set, a backup_label |
| 163 | * file was found at startup but it may have been a leftover from a stray |
| 164 | * pg_start_backup() call, not accompanied by pg_stop_backup(). |
| 165 | */ |
| 166 | XLogRecPtr minRecoveryPoint; |
| 167 | TimeLineID minRecoveryPointTLI; |
| 168 | XLogRecPtr backupStartPoint; |
| 169 | XLogRecPtr backupEndPoint; |
| 170 | bool backupEndRequired; |
| 171 | |
| 172 | /* |
| 173 | * Parameter settings that determine if the WAL can be used for archival |
| 174 | * or hot standby. |
| 175 | */ |
| 176 | int wal_level; |
| 177 | bool wal_log_hints; |
| 178 | int MaxConnections; |
| 179 | int max_worker_processes; |
| 180 | int max_wal_senders; |
| 181 | int max_prepared_xacts; |
| 182 | int max_locks_per_xact; |
| 183 | bool track_commit_timestamp; |
| 184 | |
| 185 | /* |
| 186 | * This data is used to check for hardware-architecture compatibility of |
| 187 | * the database and the backend executable. We need not check endianness |
| 188 | * explicitly, since the pg_control version will surely look wrong to a |
| 189 | * machine of different endianness, but we do need to worry about MAXALIGN |
| 190 | * and floating-point format. (Note: storage layout nominally also |
| 191 | * depends on SHORTALIGN and INTALIGN, but in practice these are the same |
| 192 | * on all architectures of interest.) |
| 193 | * |
| 194 | * Testing just one double value is not a very bulletproof test for |
| 195 | * floating-point compatibility, but it will catch most cases. |
| 196 | */ |
| 197 | uint32 maxAlign; /* alignment requirement for tuples */ |
| 198 | double floatFormat; /* constant 1234567.0 */ |
| 199 | #define FLOATFORMAT_VALUE 1234567.0 |
| 200 | |
| 201 | /* |
| 202 | * This data is used to make sure that configuration of this database is |
| 203 | * compatible with the backend executable. |
| 204 | */ |
| 205 | uint32 blcksz; /* data block size for this DB */ |
| 206 | uint32 relseg_size; /* blocks per segment of large relation */ |
| 207 | |
| 208 | uint32 xlog_blcksz; /* block size within WAL files */ |
| 209 | uint32 xlog_seg_size; /* size of each WAL segment */ |
| 210 | |
| 211 | uint32 nameDataLen; /* catalog name field width */ |
| 212 | uint32 indexMaxKeys; /* max number of columns in an index */ |
| 213 | |
| 214 | uint32 toast_max_chunk_size; /* chunk size in TOAST tables */ |
| 215 | uint32 loblksize; /* chunk size in pg_largeobject */ |
| 216 | |
| 217 | /* flags indicating pass-by-value status of various types */ |
| 218 | bool float4ByVal; /* float4 pass-by-value? */ |
| 219 | bool float8ByVal; /* float8, int8, etc pass-by-value? */ |
| 220 | |
| 221 | /* Are data pages protected by checksums? Zero if no checksum version */ |
| 222 | uint32 data_checksum_version; |
| 223 | |
| 224 | /* |
| 225 | * Random nonce, used in authentication requests that need to proceed |
| 226 | * based on values that are cluster-unique, like a SASL exchange that |
| 227 | * failed at an early stage. |
| 228 | */ |
| 229 | char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]; |
| 230 | |
| 231 | /* CRC of all above ... MUST BE LAST! */ |
| 232 | pg_crc32c crc; |
| 233 | } ControlFileData; |
| 234 | |
| 235 | /* |
| 236 | * Maximum safe value of sizeof(ControlFileData). For reliability's sake, |
| 237 | * it's critical that pg_control updates be atomic writes. That generally |
| 238 | * means the active data can't be more than one disk sector, which is 512 |
| 239 | * bytes on common hardware. Be very careful about raising this limit. |
| 240 | */ |
| 241 | #define PG_CONTROL_MAX_SAFE_SIZE 512 |
| 242 | |
| 243 | /* |
| 244 | * Physical size of the pg_control file. Note that this is considerably |
| 245 | * bigger than the actually used size (ie, sizeof(ControlFileData)). |
| 246 | * The idea is to keep the physical size constant independent of format |
| 247 | * changes, so that ReadControlFile will deliver a suitable wrong-version |
| 248 | * message instead of a read error if it's looking at an incompatible file. |
| 249 | */ |
| 250 | #define PG_CONTROL_FILE_SIZE 8192 |
| 251 | |
| 252 | #endif /* PG_CONTROL_H */ |
| 253 | |