1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * pg_control.h |
4 | * The system control file "pg_control" is not a heap relation. |
5 | * However, we define it here so that the format is documented. |
6 | * |
7 | * |
8 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
9 | * Portions Copyright (c) 1994, Regents of the University of California |
10 | * |
11 | * src/include/catalog/pg_control.h |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #ifndef PG_CONTROL_H |
16 | #define PG_CONTROL_H |
17 | |
18 | #include "access/transam.h" |
19 | #include "access/xlogdefs.h" |
20 | #include "pgtime.h" /* for pg_time_t */ |
21 | #include "port/pg_crc32c.h" |
22 | |
23 | |
24 | /* Version identifier for this pg_control format */ |
25 | #define PG_CONTROL_VERSION 1201 |
26 | |
27 | /* Nonce key length, see below */ |
28 | #define MOCK_AUTH_NONCE_LEN 32 |
29 | |
30 | /* |
31 | * Body of CheckPoint XLOG records. This is declared here because we keep |
32 | * a copy of the latest one in pg_control for possible disaster recovery. |
33 | * Changing this struct requires a PG_CONTROL_VERSION bump. |
34 | */ |
35 | typedef struct CheckPoint |
36 | { |
37 | XLogRecPtr redo; /* next RecPtr available when we began to |
38 | * create CheckPoint (i.e. REDO start point) */ |
39 | TimeLineID ThisTimeLineID; /* current TLI */ |
40 | TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new |
41 | * timeline (equals ThisTimeLineID otherwise) */ |
42 | bool fullPageWrites; /* current full_page_writes */ |
43 | FullTransactionId nextFullXid; /* next free full transaction ID */ |
44 | Oid nextOid; /* next free OID */ |
45 | MultiXactId nextMulti; /* next free MultiXactId */ |
46 | MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ |
47 | TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ |
48 | Oid oldestXidDB; /* database with minimum datfrozenxid */ |
49 | MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */ |
50 | Oid oldestMultiDB; /* database with minimum datminmxid */ |
51 | pg_time_t time; /* time stamp of checkpoint */ |
52 | TransactionId oldestCommitTsXid; /* oldest Xid with valid commit |
53 | * timestamp */ |
54 | TransactionId newestCommitTsXid; /* newest Xid with valid commit |
55 | * timestamp */ |
56 | |
57 | /* |
58 | * Oldest XID still running. This is only needed to initialize hot standby |
59 | * mode from an online checkpoint, so we only bother calculating this for |
60 | * online checkpoints and only when wal_level is replica. Otherwise it's |
61 | * set to InvalidTransactionId. |
62 | */ |
63 | TransactionId oldestActiveXid; |
64 | } CheckPoint; |
65 | |
66 | /* XLOG info values for XLOG rmgr */ |
67 | #define XLOG_CHECKPOINT_SHUTDOWN 0x00 |
68 | #define XLOG_CHECKPOINT_ONLINE 0x10 |
69 | #define XLOG_NOOP 0x20 |
70 | #define XLOG_NEXTOID 0x30 |
71 | #define XLOG_SWITCH 0x40 |
72 | #define XLOG_BACKUP_END 0x50 |
73 | #define XLOG_PARAMETER_CHANGE 0x60 |
74 | #define XLOG_RESTORE_POINT 0x70 |
75 | #define XLOG_FPW_CHANGE 0x80 |
76 | #define XLOG_END_OF_RECOVERY 0x90 |
77 | #define XLOG_FPI_FOR_HINT 0xA0 |
78 | #define XLOG_FPI 0xB0 |
79 | |
80 | |
81 | /* |
82 | * System status indicator. Note this is stored in pg_control; if you change |
83 | * it, you must bump PG_CONTROL_VERSION |
84 | */ |
85 | typedef enum DBState |
86 | { |
87 | DB_STARTUP = 0, |
88 | DB_SHUTDOWNED, |
89 | DB_SHUTDOWNED_IN_RECOVERY, |
90 | DB_SHUTDOWNING, |
91 | DB_IN_CRASH_RECOVERY, |
92 | DB_IN_ARCHIVE_RECOVERY, |
93 | DB_IN_PRODUCTION |
94 | } DBState; |
95 | |
96 | /* |
97 | * Contents of pg_control. |
98 | */ |
99 | |
100 | typedef struct ControlFileData |
101 | { |
102 | /* |
103 | * Unique system identifier --- to ensure we match up xlog files with the |
104 | * installation that produced them. |
105 | */ |
106 | uint64 system_identifier; |
107 | |
108 | /* |
109 | * Version identifier information. Keep these fields at the same offset, |
110 | * especially pg_control_version; they won't be real useful if they move |
111 | * around. (For historical reasons they must be 8 bytes into the file |
112 | * rather than immediately at the front.) |
113 | * |
114 | * pg_control_version identifies the format of pg_control itself. |
115 | * catalog_version_no identifies the format of the system catalogs. |
116 | * |
117 | * There are additional version identifiers in individual files; for |
118 | * example, WAL logs contain per-page magic numbers that can serve as |
119 | * version cues for the WAL log. |
120 | */ |
121 | uint32 pg_control_version; /* PG_CONTROL_VERSION */ |
122 | uint32 catalog_version_no; /* see catversion.h */ |
123 | |
124 | /* |
125 | * System status data |
126 | */ |
127 | DBState state; /* see enum above */ |
128 | pg_time_t time; /* time stamp of last pg_control update */ |
129 | XLogRecPtr checkPoint; /* last check point record ptr */ |
130 | |
131 | CheckPoint checkPointCopy; /* copy of last check point record */ |
132 | |
133 | XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */ |
134 | |
135 | /* |
136 | * These two values determine the minimum point we must recover up to |
137 | * before starting up: |
138 | * |
139 | * minRecoveryPoint is updated to the latest replayed LSN whenever we |
140 | * flush a data change during archive recovery. That guards against |
141 | * starting archive recovery, aborting it, and restarting with an earlier |
142 | * stop location. If we've already flushed data changes from WAL record X |
143 | * to disk, we mustn't start up until we reach X again. Zero when not |
144 | * doing archive recovery. |
145 | * |
146 | * backupStartPoint is the redo pointer of the backup start checkpoint, if |
147 | * we are recovering from an online backup and haven't reached the end of |
148 | * backup yet. It is reset to zero when the end of backup is reached, and |
149 | * we mustn't start up before that. A boolean would suffice otherwise, but |
150 | * we use the redo pointer as a cross-check when we see an end-of-backup |
151 | * record, to make sure the end-of-backup record corresponds the base |
152 | * backup we're recovering from. |
153 | * |
154 | * backupEndPoint is the backup end location, if we are recovering from an |
155 | * online backup which was taken from the standby and haven't reached the |
156 | * end of backup yet. It is initialized to the minimum recovery point in |
157 | * pg_control which was backed up last. It is reset to zero when the end |
158 | * of backup is reached, and we mustn't start up before that. |
159 | * |
160 | * If backupEndRequired is true, we know for sure that we're restoring |
161 | * from a backup, and must see a backup-end record before we can safely |
162 | * start up. If it's false, but backupStartPoint is set, a backup_label |
163 | * file was found at startup but it may have been a leftover from a stray |
164 | * pg_start_backup() call, not accompanied by pg_stop_backup(). |
165 | */ |
166 | XLogRecPtr minRecoveryPoint; |
167 | TimeLineID minRecoveryPointTLI; |
168 | XLogRecPtr backupStartPoint; |
169 | XLogRecPtr backupEndPoint; |
170 | bool backupEndRequired; |
171 | |
172 | /* |
173 | * Parameter settings that determine if the WAL can be used for archival |
174 | * or hot standby. |
175 | */ |
176 | int wal_level; |
177 | bool wal_log_hints; |
178 | int MaxConnections; |
179 | int max_worker_processes; |
180 | int max_wal_senders; |
181 | int max_prepared_xacts; |
182 | int max_locks_per_xact; |
183 | bool track_commit_timestamp; |
184 | |
185 | /* |
186 | * This data is used to check for hardware-architecture compatibility of |
187 | * the database and the backend executable. We need not check endianness |
188 | * explicitly, since the pg_control version will surely look wrong to a |
189 | * machine of different endianness, but we do need to worry about MAXALIGN |
190 | * and floating-point format. (Note: storage layout nominally also |
191 | * depends on SHORTALIGN and INTALIGN, but in practice these are the same |
192 | * on all architectures of interest.) |
193 | * |
194 | * Testing just one double value is not a very bulletproof test for |
195 | * floating-point compatibility, but it will catch most cases. |
196 | */ |
197 | uint32 maxAlign; /* alignment requirement for tuples */ |
198 | double floatFormat; /* constant 1234567.0 */ |
199 | #define FLOATFORMAT_VALUE 1234567.0 |
200 | |
201 | /* |
202 | * This data is used to make sure that configuration of this database is |
203 | * compatible with the backend executable. |
204 | */ |
205 | uint32 blcksz; /* data block size for this DB */ |
206 | uint32 relseg_size; /* blocks per segment of large relation */ |
207 | |
208 | uint32 xlog_blcksz; /* block size within WAL files */ |
209 | uint32 xlog_seg_size; /* size of each WAL segment */ |
210 | |
211 | uint32 nameDataLen; /* catalog name field width */ |
212 | uint32 indexMaxKeys; /* max number of columns in an index */ |
213 | |
214 | uint32 toast_max_chunk_size; /* chunk size in TOAST tables */ |
215 | uint32 loblksize; /* chunk size in pg_largeobject */ |
216 | |
217 | /* flags indicating pass-by-value status of various types */ |
218 | bool float4ByVal; /* float4 pass-by-value? */ |
219 | bool float8ByVal; /* float8, int8, etc pass-by-value? */ |
220 | |
221 | /* Are data pages protected by checksums? Zero if no checksum version */ |
222 | uint32 data_checksum_version; |
223 | |
224 | /* |
225 | * Random nonce, used in authentication requests that need to proceed |
226 | * based on values that are cluster-unique, like a SASL exchange that |
227 | * failed at an early stage. |
228 | */ |
229 | char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]; |
230 | |
231 | /* CRC of all above ... MUST BE LAST! */ |
232 | pg_crc32c crc; |
233 | } ControlFileData; |
234 | |
235 | /* |
236 | * Maximum safe value of sizeof(ControlFileData). For reliability's sake, |
237 | * it's critical that pg_control updates be atomic writes. That generally |
238 | * means the active data can't be more than one disk sector, which is 512 |
239 | * bytes on common hardware. Be very careful about raising this limit. |
240 | */ |
241 | #define PG_CONTROL_MAX_SAFE_SIZE 512 |
242 | |
243 | /* |
244 | * Physical size of the pg_control file. Note that this is considerably |
245 | * bigger than the actually used size (ie, sizeof(ControlFileData)). |
246 | * The idea is to keep the physical size constant independent of format |
247 | * changes, so that ReadControlFile will deliver a suitable wrong-version |
248 | * message instead of a read error if it's looking at an incompatible file. |
249 | */ |
250 | #define PG_CONTROL_FILE_SIZE 8192 |
251 | |
252 | #endif /* PG_CONTROL_H */ |
253 | |