1/*-------------------------------------------------------------------------
2 *
3 * timeline.c
4 * Functions for reading and writing timeline history files.
5 *
6 * A timeline history file lists the timeline changes of the timeline, in
7 * a simple text format. They are archived along with the WAL segments.
8 *
9 * The files are named like "<tli>.history". For example, if the database
10 * starts up and switches to timeline 5, the timeline history file would be
11 * called "00000005.history".
12 *
13 * Each line in the file represents a timeline switch:
14 *
15 * <parentTLI> <switchpoint> <reason>
16 *
17 * parentTLI ID of the parent timeline
18 * switchpoint XLogRecPtr of the WAL location where the switch happened
19 * reason human-readable explanation of why the timeline was changed
20 *
21 * The fields are separated by tabs. Lines beginning with # are comments, and
22 * are ignored. Empty lines are also ignored.
23 *
24 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
25 * Portions Copyright (c) 1994, Regents of the University of California
26 *
27 * src/backend/access/transam/timeline.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32#include "postgres.h"
33
34#include <sys/stat.h>
35#include <unistd.h>
36
37#include "access/timeline.h"
38#include "access/xlog.h"
39#include "access/xlog_internal.h"
40#include "access/xlogdefs.h"
41#include "pgstat.h"
42#include "storage/fd.h"
43
44/*
45 * Copies all timeline history files with id's between 'begin' and 'end'
46 * from archive to pg_wal.
47 */
48void
49restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
50{
51 char path[MAXPGPATH];
52 char histfname[MAXFNAMELEN];
53 TimeLineID tli;
54
55 for (tli = begin; tli < end; tli++)
56 {
57 if (tli == 1)
58 continue;
59
60 TLHistoryFileName(histfname, tli);
61 if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
62 KeepFileRestoredFromArchive(path, histfname);
63 }
64}
65
66/*
67 * Try to read a timeline's history file.
68 *
69 * If successful, return the list of component TLIs (the given TLI followed by
70 * its ancestor TLIs). If we can't find the history file, assume that the
71 * timeline has no parents, and return a list of just the specified timeline
72 * ID.
73 */
74List *
75readTimeLineHistory(TimeLineID targetTLI)
76{
77 List *result;
78 char path[MAXPGPATH];
79 char histfname[MAXFNAMELEN];
80 char fline[MAXPGPATH];
81 FILE *fd;
82 TimeLineHistoryEntry *entry;
83 TimeLineID lasttli = 0;
84 XLogRecPtr prevend;
85 bool fromArchive = false;
86
87 /* Timeline 1 does not have a history file, so no need to check */
88 if (targetTLI == 1)
89 {
90 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
91 entry->tli = targetTLI;
92 entry->begin = entry->end = InvalidXLogRecPtr;
93 return list_make1(entry);
94 }
95
96 if (ArchiveRecoveryRequested)
97 {
98 TLHistoryFileName(histfname, targetTLI);
99 fromArchive =
100 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
101 }
102 else
103 TLHistoryFilePath(path, targetTLI);
104
105 fd = AllocateFile(path, "r");
106 if (fd == NULL)
107 {
108 if (errno != ENOENT)
109 ereport(FATAL,
110 (errcode_for_file_access(),
111 errmsg("could not open file \"%s\": %m", path)));
112 /* Not there, so assume no parents */
113 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
114 entry->tli = targetTLI;
115 entry->begin = entry->end = InvalidXLogRecPtr;
116 return list_make1(entry);
117 }
118
119 result = NIL;
120
121 /*
122 * Parse the file...
123 */
124 prevend = InvalidXLogRecPtr;
125 while (fgets(fline, sizeof(fline), fd) != NULL)
126 {
127 /* skip leading whitespace and check for # comment */
128 char *ptr;
129 TimeLineID tli;
130 uint32 switchpoint_hi;
131 uint32 switchpoint_lo;
132 int nfields;
133
134 for (ptr = fline; *ptr; ptr++)
135 {
136 if (!isspace((unsigned char) *ptr))
137 break;
138 }
139 if (*ptr == '\0' || *ptr == '#')
140 continue;
141
142 nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
143
144 if (nfields < 1)
145 {
146 /* expect a numeric timeline ID as first field of line */
147 ereport(FATAL,
148 (errmsg("syntax error in history file: %s", fline),
149 errhint("Expected a numeric timeline ID.")));
150 }
151 if (nfields != 3)
152 ereport(FATAL,
153 (errmsg("syntax error in history file: %s", fline),
154 errhint("Expected a write-ahead log switchpoint location.")));
155
156 if (result && tli <= lasttli)
157 ereport(FATAL,
158 (errmsg("invalid data in history file: %s", fline),
159 errhint("Timeline IDs must be in increasing sequence.")));
160
161 lasttli = tli;
162
163 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
164 entry->tli = tli;
165 entry->begin = prevend;
166 entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
167 prevend = entry->end;
168
169 /* Build list with newest item first */
170 result = lcons(entry, result);
171
172 /* we ignore the remainder of each line */
173 }
174
175 FreeFile(fd);
176
177 if (result && targetTLI <= lasttli)
178 ereport(FATAL,
179 (errmsg("invalid data in history file \"%s\"", path),
180 errhint("Timeline IDs must be less than child timeline's ID.")));
181
182 /*
183 * Create one more entry for the "tip" of the timeline, which has no entry
184 * in the history file.
185 */
186 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
187 entry->tli = targetTLI;
188 entry->begin = prevend;
189 entry->end = InvalidXLogRecPtr;
190
191 result = lcons(entry, result);
192
193 /*
194 * If the history file was fetched from archive, save it in pg_wal for
195 * future reference.
196 */
197 if (fromArchive)
198 KeepFileRestoredFromArchive(path, histfname);
199
200 return result;
201}
202
203/*
204 * Probe whether a timeline history file exists for the given timeline ID
205 */
206bool
207existsTimeLineHistory(TimeLineID probeTLI)
208{
209 char path[MAXPGPATH];
210 char histfname[MAXFNAMELEN];
211 FILE *fd;
212
213 /* Timeline 1 does not have a history file, so no need to check */
214 if (probeTLI == 1)
215 return false;
216
217 if (ArchiveRecoveryRequested)
218 {
219 TLHistoryFileName(histfname, probeTLI);
220 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
221 }
222 else
223 TLHistoryFilePath(path, probeTLI);
224
225 fd = AllocateFile(path, "r");
226 if (fd != NULL)
227 {
228 FreeFile(fd);
229 return true;
230 }
231 else
232 {
233 if (errno != ENOENT)
234 ereport(FATAL,
235 (errcode_for_file_access(),
236 errmsg("could not open file \"%s\": %m", path)));
237 return false;
238 }
239}
240
241/*
242 * Find the newest existing timeline, assuming that startTLI exists.
243 *
244 * Note: while this is somewhat heuristic, it does positively guarantee
245 * that (result + 1) is not a known timeline, and therefore it should
246 * be safe to assign that ID to a new timeline.
247 */
248TimeLineID
249findNewestTimeLine(TimeLineID startTLI)
250{
251 TimeLineID newestTLI;
252 TimeLineID probeTLI;
253
254 /*
255 * The algorithm is just to probe for the existence of timeline history
256 * files. XXX is it useful to allow gaps in the sequence?
257 */
258 newestTLI = startTLI;
259
260 for (probeTLI = startTLI + 1;; probeTLI++)
261 {
262 if (existsTimeLineHistory(probeTLI))
263 {
264 newestTLI = probeTLI; /* probeTLI exists */
265 }
266 else
267 {
268 /* doesn't exist, assume we're done */
269 break;
270 }
271 }
272
273 return newestTLI;
274}
275
276/*
277 * Create a new timeline history file.
278 *
279 * newTLI: ID of the new timeline
280 * parentTLI: ID of its immediate parent
281 * switchpoint: WAL location where the system switched to the new timeline
282 * reason: human-readable explanation of why the timeline was switched
283 *
284 * Currently this is only used at the end recovery, and so there are no locking
285 * considerations. But we should be just as tense as XLogFileInit to avoid
286 * emplacing a bogus file.
287 */
288void
289writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
290 XLogRecPtr switchpoint, char *reason)
291{
292 char path[MAXPGPATH];
293 char tmppath[MAXPGPATH];
294 char histfname[MAXFNAMELEN];
295 char buffer[BLCKSZ];
296 int srcfd;
297 int fd;
298 int nbytes;
299
300 Assert(newTLI > parentTLI); /* else bad selection of newTLI */
301
302 /*
303 * Write into a temp file name.
304 */
305 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
306
307 unlink(tmppath);
308
309 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
310 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
311 if (fd < 0)
312 ereport(ERROR,
313 (errcode_for_file_access(),
314 errmsg("could not create file \"%s\": %m", tmppath)));
315
316 /*
317 * If a history file exists for the parent, copy it verbatim
318 */
319 if (ArchiveRecoveryRequested)
320 {
321 TLHistoryFileName(histfname, parentTLI);
322 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
323 }
324 else
325 TLHistoryFilePath(path, parentTLI);
326
327 srcfd = OpenTransientFile(path, O_RDONLY);
328 if (srcfd < 0)
329 {
330 if (errno != ENOENT)
331 ereport(ERROR,
332 (errcode_for_file_access(),
333 errmsg("could not open file \"%s\": %m", path)));
334 /* Not there, so assume parent has no parents */
335 }
336 else
337 {
338 for (;;)
339 {
340 errno = 0;
341 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
342 nbytes = (int) read(srcfd, buffer, sizeof(buffer));
343 pgstat_report_wait_end();
344 if (nbytes < 0 || errno != 0)
345 ereport(ERROR,
346 (errcode_for_file_access(),
347 errmsg("could not read file \"%s\": %m", path)));
348 if (nbytes == 0)
349 break;
350 errno = 0;
351 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
352 if ((int) write(fd, buffer, nbytes) != nbytes)
353 {
354 int save_errno = errno;
355
356 /*
357 * If we fail to make the file, delete it to release disk
358 * space
359 */
360 unlink(tmppath);
361
362 /*
363 * if write didn't set errno, assume problem is no disk space
364 */
365 errno = save_errno ? save_errno : ENOSPC;
366
367 ereport(ERROR,
368 (errcode_for_file_access(),
369 errmsg("could not write to file \"%s\": %m", tmppath)));
370 }
371 pgstat_report_wait_end();
372 }
373
374 if (CloseTransientFile(srcfd))
375 ereport(ERROR,
376 (errcode_for_file_access(),
377 errmsg("could not close file \"%s\": %m", path)));
378 }
379
380 /*
381 * Append one line with the details of this timeline split.
382 *
383 * If we did have a parent file, insert an extra newline just in case the
384 * parent file failed to end with one.
385 */
386 snprintf(buffer, sizeof(buffer),
387 "%s%u\t%X/%X\t%s\n",
388 (srcfd < 0) ? "" : "\n",
389 parentTLI,
390 (uint32) (switchpoint >> 32), (uint32) (switchpoint),
391 reason);
392
393 nbytes = strlen(buffer);
394 errno = 0;
395 if ((int) write(fd, buffer, nbytes) != nbytes)
396 {
397 int save_errno = errno;
398
399 /*
400 * If we fail to make the file, delete it to release disk space
401 */
402 unlink(tmppath);
403 /* if write didn't set errno, assume problem is no disk space */
404 errno = save_errno ? save_errno : ENOSPC;
405
406 ereport(ERROR,
407 (errcode_for_file_access(),
408 errmsg("could not write to file \"%s\": %m", tmppath)));
409 }
410
411 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
412 if (pg_fsync(fd) != 0)
413 ereport(data_sync_elevel(ERROR),
414 (errcode_for_file_access(),
415 errmsg("could not fsync file \"%s\": %m", tmppath)));
416 pgstat_report_wait_end();
417
418 if (CloseTransientFile(fd))
419 ereport(ERROR,
420 (errcode_for_file_access(),
421 errmsg("could not close file \"%s\": %m", tmppath)));
422
423 /*
424 * Now move the completed history file into place with its final name.
425 */
426 TLHistoryFilePath(path, newTLI);
427
428 /*
429 * Perform the rename using link if available, paranoidly trying to avoid
430 * overwriting an existing file (there shouldn't be one).
431 */
432 durable_link_or_rename(tmppath, path, ERROR);
433
434 /* The history file can be archived immediately. */
435 if (XLogArchivingActive())
436 {
437 TLHistoryFileName(histfname, newTLI);
438 XLogArchiveNotify(histfname);
439 }
440}
441
442/*
443 * Writes a history file for given timeline and contents.
444 *
445 * Currently this is only used in the walreceiver process, and so there are
446 * no locking considerations. But we should be just as tense as XLogFileInit
447 * to avoid emplacing a bogus file.
448 */
449void
450writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
451{
452 char path[MAXPGPATH];
453 char tmppath[MAXPGPATH];
454 int fd;
455
456 /*
457 * Write into a temp file name.
458 */
459 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
460
461 unlink(tmppath);
462
463 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
464 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
465 if (fd < 0)
466 ereport(ERROR,
467 (errcode_for_file_access(),
468 errmsg("could not create file \"%s\": %m", tmppath)));
469
470 errno = 0;
471 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
472 if ((int) write(fd, content, size) != size)
473 {
474 int save_errno = errno;
475
476 /*
477 * If we fail to make the file, delete it to release disk space
478 */
479 unlink(tmppath);
480 /* if write didn't set errno, assume problem is no disk space */
481 errno = save_errno ? save_errno : ENOSPC;
482
483 ereport(ERROR,
484 (errcode_for_file_access(),
485 errmsg("could not write to file \"%s\": %m", tmppath)));
486 }
487 pgstat_report_wait_end();
488
489 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
490 if (pg_fsync(fd) != 0)
491 ereport(data_sync_elevel(ERROR),
492 (errcode_for_file_access(),
493 errmsg("could not fsync file \"%s\": %m", tmppath)));
494 pgstat_report_wait_end();
495
496 if (CloseTransientFile(fd))
497 ereport(ERROR,
498 (errcode_for_file_access(),
499 errmsg("could not close file \"%s\": %m", tmppath)));
500
501 /*
502 * Now move the completed history file into place with its final name.
503 */
504 TLHistoryFilePath(path, tli);
505
506 /*
507 * Perform the rename using link if available, paranoidly trying to avoid
508 * overwriting an existing file (there shouldn't be one).
509 */
510 durable_link_or_rename(tmppath, path, ERROR);
511}
512
513/*
514 * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
515 */
516bool
517tliInHistory(TimeLineID tli, List *expectedTLEs)
518{
519 ListCell *cell;
520
521 foreach(cell, expectedTLEs)
522 {
523 if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
524 return true;
525 }
526
527 return false;
528}
529
530/*
531 * Returns the ID of the timeline in use at a particular point in time, in
532 * the given timeline history.
533 */
534TimeLineID
535tliOfPointInHistory(XLogRecPtr ptr, List *history)
536{
537 ListCell *cell;
538
539 foreach(cell, history)
540 {
541 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
542
543 if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
544 (XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
545 {
546 /* found it */
547 return tle->tli;
548 }
549 }
550
551 /* shouldn't happen. */
552 elog(ERROR, "timeline history was not contiguous");
553 return 0; /* keep compiler quiet */
554}
555
556/*
557 * Returns the point in history where we branched off the given timeline,
558 * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
559 * the timeline is current, ie. we have not branched off from it, and throws
560 * an error if the timeline is not part of this server's history.
561 */
562XLogRecPtr
563tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
564{
565 ListCell *cell;
566
567 if (nextTLI)
568 *nextTLI = 0;
569 foreach(cell, history)
570 {
571 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
572
573 if (tle->tli == tli)
574 return tle->end;
575 if (nextTLI)
576 *nextTLI = tle->tli;
577 }
578
579 ereport(ERROR,
580 (errmsg("requested timeline %u is not in this server's history",
581 tli)));
582 return InvalidXLogRecPtr; /* keep compiler quiet */
583}
584