1/*-------------------------------------------------------------------------
2 *
3 * pg_control.h
4 * The system control file "pg_control" is not a heap relation.
5 * However, we define it here so that the format is documented.
6 *
7 *
8 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
9 * Portions Copyright (c) 1994, Regents of the University of California
10 *
11 * src/include/catalog/pg_control.h
12 *
13 *-------------------------------------------------------------------------
14 */
15#ifndef PG_CONTROL_H
16#define PG_CONTROL_H
17
18#include "access/transam.h"
19#include "access/xlogdefs.h"
20#include "pgtime.h" /* for pg_time_t */
21#include "port/pg_crc32c.h"
22
23
24/* Version identifier for this pg_control format */
25#define PG_CONTROL_VERSION 1201
26
27/* Nonce key length, see below */
28#define MOCK_AUTH_NONCE_LEN 32
29
30/*
31 * Body of CheckPoint XLOG records. This is declared here because we keep
32 * a copy of the latest one in pg_control for possible disaster recovery.
33 * Changing this struct requires a PG_CONTROL_VERSION bump.
34 */
35typedef struct CheckPoint
36{
37 XLogRecPtr redo; /* next RecPtr available when we began to
38 * create CheckPoint (i.e. REDO start point) */
39 TimeLineID ThisTimeLineID; /* current TLI */
40 TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new
41 * timeline (equals ThisTimeLineID otherwise) */
42 bool fullPageWrites; /* current full_page_writes */
43 FullTransactionId nextFullXid; /* next free full transaction ID */
44 Oid nextOid; /* next free OID */
45 MultiXactId nextMulti; /* next free MultiXactId */
46 MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
47 TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
48 Oid oldestXidDB; /* database with minimum datfrozenxid */
49 MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */
50 Oid oldestMultiDB; /* database with minimum datminmxid */
51 pg_time_t time; /* time stamp of checkpoint */
52 TransactionId oldestCommitTsXid; /* oldest Xid with valid commit
53 * timestamp */
54 TransactionId newestCommitTsXid; /* newest Xid with valid commit
55 * timestamp */
56
57 /*
58 * Oldest XID still running. This is only needed to initialize hot standby
59 * mode from an online checkpoint, so we only bother calculating this for
60 * online checkpoints and only when wal_level is replica. Otherwise it's
61 * set to InvalidTransactionId.
62 */
63 TransactionId oldestActiveXid;
64} CheckPoint;
65
66/* XLOG info values for XLOG rmgr */
67#define XLOG_CHECKPOINT_SHUTDOWN 0x00
68#define XLOG_CHECKPOINT_ONLINE 0x10
69#define XLOG_NOOP 0x20
70#define XLOG_NEXTOID 0x30
71#define XLOG_SWITCH 0x40
72#define XLOG_BACKUP_END 0x50
73#define XLOG_PARAMETER_CHANGE 0x60
74#define XLOG_RESTORE_POINT 0x70
75#define XLOG_FPW_CHANGE 0x80
76#define XLOG_END_OF_RECOVERY 0x90
77#define XLOG_FPI_FOR_HINT 0xA0
78#define XLOG_FPI 0xB0
79
80
81/*
82 * System status indicator. Note this is stored in pg_control; if you change
83 * it, you must bump PG_CONTROL_VERSION
84 */
85typedef enum DBState
86{
87 DB_STARTUP = 0,
88 DB_SHUTDOWNED,
89 DB_SHUTDOWNED_IN_RECOVERY,
90 DB_SHUTDOWNING,
91 DB_IN_CRASH_RECOVERY,
92 DB_IN_ARCHIVE_RECOVERY,
93 DB_IN_PRODUCTION
94} DBState;
95
96/*
97 * Contents of pg_control.
98 */
99
100typedef struct ControlFileData
101{
102 /*
103 * Unique system identifier --- to ensure we match up xlog files with the
104 * installation that produced them.
105 */
106 uint64 system_identifier;
107
108 /*
109 * Version identifier information. Keep these fields at the same offset,
110 * especially pg_control_version; they won't be real useful if they move
111 * around. (For historical reasons they must be 8 bytes into the file
112 * rather than immediately at the front.)
113 *
114 * pg_control_version identifies the format of pg_control itself.
115 * catalog_version_no identifies the format of the system catalogs.
116 *
117 * There are additional version identifiers in individual files; for
118 * example, WAL logs contain per-page magic numbers that can serve as
119 * version cues for the WAL log.
120 */
121 uint32 pg_control_version; /* PG_CONTROL_VERSION */
122 uint32 catalog_version_no; /* see catversion.h */
123
124 /*
125 * System status data
126 */
127 DBState state; /* see enum above */
128 pg_time_t time; /* time stamp of last pg_control update */
129 XLogRecPtr checkPoint; /* last check point record ptr */
130
131 CheckPoint checkPointCopy; /* copy of last check point record */
132
133 XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
134
135 /*
136 * These two values determine the minimum point we must recover up to
137 * before starting up:
138 *
139 * minRecoveryPoint is updated to the latest replayed LSN whenever we
140 * flush a data change during archive recovery. That guards against
141 * starting archive recovery, aborting it, and restarting with an earlier
142 * stop location. If we've already flushed data changes from WAL record X
143 * to disk, we mustn't start up until we reach X again. Zero when not
144 * doing archive recovery.
145 *
146 * backupStartPoint is the redo pointer of the backup start checkpoint, if
147 * we are recovering from an online backup and haven't reached the end of
148 * backup yet. It is reset to zero when the end of backup is reached, and
149 * we mustn't start up before that. A boolean would suffice otherwise, but
150 * we use the redo pointer as a cross-check when we see an end-of-backup
151 * record, to make sure the end-of-backup record corresponds the base
152 * backup we're recovering from.
153 *
154 * backupEndPoint is the backup end location, if we are recovering from an
155 * online backup which was taken from the standby and haven't reached the
156 * end of backup yet. It is initialized to the minimum recovery point in
157 * pg_control which was backed up last. It is reset to zero when the end
158 * of backup is reached, and we mustn't start up before that.
159 *
160 * If backupEndRequired is true, we know for sure that we're restoring
161 * from a backup, and must see a backup-end record before we can safely
162 * start up. If it's false, but backupStartPoint is set, a backup_label
163 * file was found at startup but it may have been a leftover from a stray
164 * pg_start_backup() call, not accompanied by pg_stop_backup().
165 */
166 XLogRecPtr minRecoveryPoint;
167 TimeLineID minRecoveryPointTLI;
168 XLogRecPtr backupStartPoint;
169 XLogRecPtr backupEndPoint;
170 bool backupEndRequired;
171
172 /*
173 * Parameter settings that determine if the WAL can be used for archival
174 * or hot standby.
175 */
176 int wal_level;
177 bool wal_log_hints;
178 int MaxConnections;
179 int max_worker_processes;
180 int max_wal_senders;
181 int max_prepared_xacts;
182 int max_locks_per_xact;
183 bool track_commit_timestamp;
184
185 /*
186 * This data is used to check for hardware-architecture compatibility of
187 * the database and the backend executable. We need not check endianness
188 * explicitly, since the pg_control version will surely look wrong to a
189 * machine of different endianness, but we do need to worry about MAXALIGN
190 * and floating-point format. (Note: storage layout nominally also
191 * depends on SHORTALIGN and INTALIGN, but in practice these are the same
192 * on all architectures of interest.)
193 *
194 * Testing just one double value is not a very bulletproof test for
195 * floating-point compatibility, but it will catch most cases.
196 */
197 uint32 maxAlign; /* alignment requirement for tuples */
198 double floatFormat; /* constant 1234567.0 */
199#define FLOATFORMAT_VALUE 1234567.0
200
201 /*
202 * This data is used to make sure that configuration of this database is
203 * compatible with the backend executable.
204 */
205 uint32 blcksz; /* data block size for this DB */
206 uint32 relseg_size; /* blocks per segment of large relation */
207
208 uint32 xlog_blcksz; /* block size within WAL files */
209 uint32 xlog_seg_size; /* size of each WAL segment */
210
211 uint32 nameDataLen; /* catalog name field width */
212 uint32 indexMaxKeys; /* max number of columns in an index */
213
214 uint32 toast_max_chunk_size; /* chunk size in TOAST tables */
215 uint32 loblksize; /* chunk size in pg_largeobject */
216
217 /* flags indicating pass-by-value status of various types */
218 bool float4ByVal; /* float4 pass-by-value? */
219 bool float8ByVal; /* float8, int8, etc pass-by-value? */
220
221 /* Are data pages protected by checksums? Zero if no checksum version */
222 uint32 data_checksum_version;
223
224 /*
225 * Random nonce, used in authentication requests that need to proceed
226 * based on values that are cluster-unique, like a SASL exchange that
227 * failed at an early stage.
228 */
229 char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
230
231 /* CRC of all above ... MUST BE LAST! */
232 pg_crc32c crc;
233} ControlFileData;
234
235/*
236 * Maximum safe value of sizeof(ControlFileData). For reliability's sake,
237 * it's critical that pg_control updates be atomic writes. That generally
238 * means the active data can't be more than one disk sector, which is 512
239 * bytes on common hardware. Be very careful about raising this limit.
240 */
241#define PG_CONTROL_MAX_SAFE_SIZE 512
242
243/*
244 * Physical size of the pg_control file. Note that this is considerably
245 * bigger than the actually used size (ie, sizeof(ControlFileData)).
246 * The idea is to keep the physical size constant independent of format
247 * changes, so that ReadControlFile will deliver a suitable wrong-version
248 * message instead of a read error if it's looking at an incompatible file.
249 */
250#define PG_CONTROL_FILE_SIZE 8192
251
252#endif /* PG_CONTROL_H */
253