| 1 | /*------------------------------------------------------------------------- | 
|---|
| 2 | * | 
|---|
| 3 | * receivelog.c - receive WAL files using the streaming | 
|---|
| 4 | *				  replication protocol. | 
|---|
| 5 | * | 
|---|
| 6 | * Author: Magnus Hagander <magnus@hagander.net> | 
|---|
| 7 | * | 
|---|
| 8 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group | 
|---|
| 9 | * | 
|---|
| 10 | * IDENTIFICATION | 
|---|
| 11 | *		  src/bin/pg_basebackup/receivelog.c | 
|---|
| 12 | *------------------------------------------------------------------------- | 
|---|
| 13 | */ | 
|---|
| 14 |  | 
|---|
| 15 | #include "postgres_fe.h" | 
|---|
| 16 |  | 
|---|
| 17 | #include <sys/stat.h> | 
|---|
| 18 | #include <unistd.h> | 
|---|
| 19 | #ifdef HAVE_SYS_SELECT_H | 
|---|
| 20 | #include <sys/select.h> | 
|---|
| 21 | #endif | 
|---|
| 22 |  | 
|---|
| 23 | /* local includes */ | 
|---|
| 24 | #include "receivelog.h" | 
|---|
| 25 | #include "streamutil.h" | 
|---|
| 26 |  | 
|---|
| 27 | #include "libpq-fe.h" | 
|---|
| 28 | #include "access/xlog_internal.h" | 
|---|
| 29 | #include "common/file_utils.h" | 
|---|
| 30 | #include "common/logging.h" | 
|---|
| 31 |  | 
|---|
| 32 |  | 
|---|
| 33 | /* fd and filename for currently open WAL file */ | 
|---|
| 34 | static Walfile *walfile = NULL; | 
|---|
| 35 | static char current_walfile_name[MAXPGPATH] = ""; | 
|---|
| 36 | static bool reportFlushPosition = false; | 
|---|
| 37 | static XLogRecPtr lastFlushPosition = InvalidXLogRecPtr; | 
|---|
| 38 |  | 
|---|
| 39 | static bool still_sending = true;	/* feedback still needs to be sent? */ | 
|---|
| 40 |  | 
|---|
| 41 | static PGresult *HandleCopyStream(PGconn *conn, StreamCtl *stream, | 
|---|
| 42 | XLogRecPtr *stoppos); | 
|---|
| 43 | static int	CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket); | 
|---|
| 44 | static int	CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket, | 
|---|
| 45 | char **buffer); | 
|---|
| 46 | static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, | 
|---|
| 47 | int len, XLogRecPtr blockpos, TimestampTz *last_status); | 
|---|
| 48 | static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, | 
|---|
| 49 | XLogRecPtr *blockpos); | 
|---|
| 50 | static PGresult *HandleEndOfCopyStream(PGconn *conn, StreamCtl *stream, char *copybuf, | 
|---|
| 51 | XLogRecPtr blockpos, XLogRecPtr *stoppos); | 
|---|
| 52 | static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos, | 
|---|
| 53 | XLogRecPtr *stoppos); | 
|---|
| 54 | static long CalculateCopyStreamSleeptime(TimestampTz now, int standby_message_timeout, | 
|---|
| 55 | TimestampTz last_status); | 
|---|
| 56 |  | 
|---|
| 57 | static bool ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, | 
|---|
| 58 | uint32 *timeline); | 
|---|
| 59 |  | 
|---|
| 60 | static bool | 
|---|
| 61 | mark_file_as_archived(StreamCtl *stream, const char *fname) | 
|---|
| 62 | { | 
|---|
| 63 | Walfile    *f; | 
|---|
| 64 | static char tmppath[MAXPGPATH]; | 
|---|
| 65 |  | 
|---|
| 66 | snprintf(tmppath, sizeof(tmppath), "archive_status/%s.done", | 
|---|
| 67 | fname); | 
|---|
| 68 |  | 
|---|
| 69 | f = stream->walmethod->open_for_write(tmppath, NULL, 0); | 
|---|
| 70 | if (f == NULL) | 
|---|
| 71 | { | 
|---|
| 72 | pg_log_error( "could not create archive status file \"%s\": %s", | 
|---|
| 73 | tmppath, stream->walmethod->getlasterror()); | 
|---|
| 74 | return false; | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | stream->walmethod->close(f, CLOSE_NORMAL); | 
|---|
| 78 |  | 
|---|
| 79 | return true; | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | /* | 
|---|
| 83 | * Open a new WAL file in the specified directory. | 
|---|
| 84 | * | 
|---|
| 85 | * Returns true if OK; on failure, returns false after printing an error msg. | 
|---|
| 86 | * On success, 'walfile' is set to the FD for the file, and the base filename | 
|---|
| 87 | * (without partial_suffix) is stored in 'current_walfile_name'. | 
|---|
| 88 | * | 
|---|
| 89 | * The file will be padded to 16Mb with zeroes. | 
|---|
| 90 | */ | 
|---|
| 91 | static bool | 
|---|
| 92 | open_walfile(StreamCtl *stream, XLogRecPtr startpoint) | 
|---|
| 93 | { | 
|---|
| 94 | Walfile    *f; | 
|---|
| 95 | char		fn[MAXPGPATH]; | 
|---|
| 96 | ssize_t		size; | 
|---|
| 97 | XLogSegNo	segno; | 
|---|
| 98 |  | 
|---|
| 99 | XLByteToSeg(startpoint, segno, WalSegSz); | 
|---|
| 100 | XLogFileName(current_walfile_name, stream->timeline, segno, WalSegSz); | 
|---|
| 101 |  | 
|---|
| 102 | snprintf(fn, sizeof(fn), "%s%s", current_walfile_name, | 
|---|
| 103 | stream->partial_suffix ? stream->partial_suffix : ""); | 
|---|
| 104 |  | 
|---|
| 105 | /* | 
|---|
| 106 | * When streaming to files, if an existing file exists we verify that it's | 
|---|
| 107 | * either empty (just created), or a complete WalSegSz segment (in which | 
|---|
| 108 | * case it has been created and padded). Anything else indicates a corrupt | 
|---|
| 109 | * file. | 
|---|
| 110 | * | 
|---|
| 111 | * When streaming to tar, no file with this name will exist before, so we | 
|---|
| 112 | * never have to verify a size. | 
|---|
| 113 | */ | 
|---|
| 114 | if (stream->walmethod->existsfile(fn)) | 
|---|
| 115 | { | 
|---|
| 116 | size = stream->walmethod->get_file_size(fn); | 
|---|
| 117 | if (size < 0) | 
|---|
| 118 | { | 
|---|
| 119 | pg_log_error( "could not get size of write-ahead log file \"%s\": %s", | 
|---|
| 120 | fn, stream->walmethod->getlasterror()); | 
|---|
| 121 | return false; | 
|---|
| 122 | } | 
|---|
| 123 | if (size == WalSegSz) | 
|---|
| 124 | { | 
|---|
| 125 | /* Already padded file. Open it for use */ | 
|---|
| 126 | f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, 0); | 
|---|
| 127 | if (f == NULL) | 
|---|
| 128 | { | 
|---|
| 129 | pg_log_error( "could not open existing write-ahead log file \"%s\": %s", | 
|---|
| 130 | fn, stream->walmethod->getlasterror()); | 
|---|
| 131 | return false; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | /* fsync file in case of a previous crash */ | 
|---|
| 135 | if (stream->walmethod->sync(f) != 0) | 
|---|
| 136 | { | 
|---|
| 137 | pg_log_error( "could not fsync existing write-ahead log file \"%s\": %s", | 
|---|
| 138 | fn, stream->walmethod->getlasterror()); | 
|---|
| 139 | stream->walmethod->close(f, CLOSE_UNLINK); | 
|---|
| 140 | return false; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | walfile = f; | 
|---|
| 144 | return true; | 
|---|
| 145 | } | 
|---|
| 146 | if (size != 0) | 
|---|
| 147 | { | 
|---|
| 148 | /* if write didn't set errno, assume problem is no disk space */ | 
|---|
| 149 | if (errno == 0) | 
|---|
| 150 | errno = ENOSPC; | 
|---|
| 151 | pg_log_error(ngettext( "write-ahead log file \"%s\" has %d byte, should be 0 or %d", | 
|---|
| 152 | "write-ahead log file \"%s\" has %d bytes, should be 0 or %d", | 
|---|
| 153 | size), | 
|---|
| 154 | fn, (int) size, WalSegSz); | 
|---|
| 155 | return false; | 
|---|
| 156 | } | 
|---|
| 157 | /* File existed and was empty, so fall through and open */ | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | /* No file existed, so create one */ | 
|---|
| 161 |  | 
|---|
| 162 | f = stream->walmethod->open_for_write(current_walfile_name, | 
|---|
| 163 | stream->partial_suffix, WalSegSz); | 
|---|
| 164 | if (f == NULL) | 
|---|
| 165 | { | 
|---|
| 166 | pg_log_error( "could not open write-ahead log file \"%s\": %s", | 
|---|
| 167 | fn, stream->walmethod->getlasterror()); | 
|---|
| 168 | return false; | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | walfile = f; | 
|---|
| 172 | return true; | 
|---|
| 173 | } | 
|---|
| 174 |  | 
|---|
| 175 | /* | 
|---|
| 176 | * Close the current WAL file (if open), and rename it to the correct | 
|---|
| 177 | * filename if it's complete. On failure, prints an error message to stderr | 
|---|
| 178 | * and returns false, otherwise returns true. | 
|---|
| 179 | */ | 
|---|
| 180 | static bool | 
|---|
| 181 | close_walfile(StreamCtl *stream, XLogRecPtr pos) | 
|---|
| 182 | { | 
|---|
| 183 | off_t		currpos; | 
|---|
| 184 | int			r; | 
|---|
| 185 |  | 
|---|
| 186 | if (walfile == NULL) | 
|---|
| 187 | return true; | 
|---|
| 188 |  | 
|---|
| 189 | currpos = stream->walmethod->get_current_pos(walfile); | 
|---|
| 190 | if (currpos == -1) | 
|---|
| 191 | { | 
|---|
| 192 | pg_log_error( "could not determine seek position in file \"%s\": %s", | 
|---|
| 193 | current_walfile_name, stream->walmethod->getlasterror()); | 
|---|
| 194 | stream->walmethod->close(walfile, CLOSE_UNLINK); | 
|---|
| 195 | walfile = NULL; | 
|---|
| 196 |  | 
|---|
| 197 | return false; | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | if (stream->partial_suffix) | 
|---|
| 201 | { | 
|---|
| 202 | if (currpos == WalSegSz) | 
|---|
| 203 | r = stream->walmethod->close(walfile, CLOSE_NORMAL); | 
|---|
| 204 | else | 
|---|
| 205 | { | 
|---|
| 206 | pg_log_info( "not renaming \"%s%s\", segment is not complete", | 
|---|
| 207 | current_walfile_name, stream->partial_suffix); | 
|---|
| 208 | r = stream->walmethod->close(walfile, CLOSE_NO_RENAME); | 
|---|
| 209 | } | 
|---|
| 210 | } | 
|---|
| 211 | else | 
|---|
| 212 | r = stream->walmethod->close(walfile, CLOSE_NORMAL); | 
|---|
| 213 |  | 
|---|
| 214 | walfile = NULL; | 
|---|
| 215 |  | 
|---|
| 216 | if (r != 0) | 
|---|
| 217 | { | 
|---|
| 218 | pg_log_error( "could not close file \"%s\": %s", | 
|---|
| 219 | current_walfile_name, stream->walmethod->getlasterror()); | 
|---|
| 220 | return false; | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | /* | 
|---|
| 224 | * Mark file as archived if requested by the caller - pg_basebackup needs | 
|---|
| 225 | * to do so as files can otherwise get archived again after promotion of a | 
|---|
| 226 | * new node. This is in line with walreceiver.c always doing a | 
|---|
| 227 | * XLogArchiveForceDone() after a complete segment. | 
|---|
| 228 | */ | 
|---|
| 229 | if (currpos == WalSegSz && stream->mark_done) | 
|---|
| 230 | { | 
|---|
| 231 | /* writes error message if failed */ | 
|---|
| 232 | if (!mark_file_as_archived(stream, current_walfile_name)) | 
|---|
| 233 | return false; | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | lastFlushPosition = pos; | 
|---|
| 237 | return true; | 
|---|
| 238 | } | 
|---|
| 239 |  | 
|---|
| 240 |  | 
|---|
| 241 | /* | 
|---|
| 242 | * Check if a timeline history file exists. | 
|---|
| 243 | */ | 
|---|
| 244 | static bool | 
|---|
| 245 | existsTimeLineHistoryFile(StreamCtl *stream) | 
|---|
| 246 | { | 
|---|
| 247 | char		histfname[MAXFNAMELEN]; | 
|---|
| 248 |  | 
|---|
| 249 | /* | 
|---|
| 250 | * Timeline 1 never has a history file. We treat that as if it existed, | 
|---|
| 251 | * since we never need to stream it. | 
|---|
| 252 | */ | 
|---|
| 253 | if (stream->timeline == 1) | 
|---|
| 254 | return true; | 
|---|
| 255 |  | 
|---|
| 256 | TLHistoryFileName(histfname, stream->timeline); | 
|---|
| 257 |  | 
|---|
| 258 | return stream->walmethod->existsfile(histfname); | 
|---|
| 259 | } | 
|---|
| 260 |  | 
|---|
| 261 | static bool | 
|---|
| 262 | writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content) | 
|---|
| 263 | { | 
|---|
| 264 | int			size = strlen(content); | 
|---|
| 265 | char		histfname[MAXFNAMELEN]; | 
|---|
| 266 | Walfile    *f; | 
|---|
| 267 |  | 
|---|
| 268 | /* | 
|---|
| 269 | * Check that the server's idea of how timeline history files should be | 
|---|
| 270 | * named matches ours. | 
|---|
| 271 | */ | 
|---|
| 272 | TLHistoryFileName(histfname, stream->timeline); | 
|---|
| 273 | if (strcmp(histfname, filename) != 0) | 
|---|
| 274 | { | 
|---|
| 275 | pg_log_error( "server reported unexpected history file name for timeline %u: %s", | 
|---|
| 276 | stream->timeline, filename); | 
|---|
| 277 | return false; | 
|---|
| 278 | } | 
|---|
| 279 |  | 
|---|
| 280 | f = stream->walmethod->open_for_write(histfname, ".tmp", 0); | 
|---|
| 281 | if (f == NULL) | 
|---|
| 282 | { | 
|---|
| 283 | pg_log_error( "could not create timeline history file \"%s\": %s", | 
|---|
| 284 | histfname, stream->walmethod->getlasterror()); | 
|---|
| 285 | return false; | 
|---|
| 286 | } | 
|---|
| 287 |  | 
|---|
| 288 | if ((int) stream->walmethod->write(f, content, size) != size) | 
|---|
| 289 | { | 
|---|
| 290 | pg_log_error( "could not write timeline history file \"%s\": %s", | 
|---|
| 291 | histfname, stream->walmethod->getlasterror()); | 
|---|
| 292 |  | 
|---|
| 293 | /* | 
|---|
| 294 | * If we fail to make the file, delete it to release disk space | 
|---|
| 295 | */ | 
|---|
| 296 | stream->walmethod->close(f, CLOSE_UNLINK); | 
|---|
| 297 |  | 
|---|
| 298 | return false; | 
|---|
| 299 | } | 
|---|
| 300 |  | 
|---|
| 301 | if (stream->walmethod->close(f, CLOSE_NORMAL) != 0) | 
|---|
| 302 | { | 
|---|
| 303 | pg_log_error( "could not close file \"%s\": %s", | 
|---|
| 304 | histfname, stream->walmethod->getlasterror()); | 
|---|
| 305 | return false; | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | /* Maintain archive_status, check close_walfile() for details. */ | 
|---|
| 309 | if (stream->mark_done) | 
|---|
| 310 | { | 
|---|
| 311 | /* writes error message if failed */ | 
|---|
| 312 | if (!mark_file_as_archived(stream, histfname)) | 
|---|
| 313 | return false; | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|
| 316 | return true; | 
|---|
| 317 | } | 
|---|
| 318 |  | 
|---|
| 319 | /* | 
|---|
| 320 | * Send a Standby Status Update message to server. | 
|---|
| 321 | */ | 
|---|
| 322 | static bool | 
|---|
| 323 | sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested) | 
|---|
| 324 | { | 
|---|
| 325 | char		replybuf[1 + 8 + 8 + 8 + 8 + 1]; | 
|---|
| 326 | int			len = 0; | 
|---|
| 327 |  | 
|---|
| 328 | replybuf[len] = 'r'; | 
|---|
| 329 | len += 1; | 
|---|
| 330 | fe_sendint64(blockpos, &replybuf[len]); /* write */ | 
|---|
| 331 | len += 8; | 
|---|
| 332 | if (reportFlushPosition) | 
|---|
| 333 | fe_sendint64(lastFlushPosition, &replybuf[len]);	/* flush */ | 
|---|
| 334 | else | 
|---|
| 335 | fe_sendint64(InvalidXLogRecPtr, &replybuf[len]);	/* flush */ | 
|---|
| 336 | len += 8; | 
|---|
| 337 | fe_sendint64(InvalidXLogRecPtr, &replybuf[len]);	/* apply */ | 
|---|
| 338 | len += 8; | 
|---|
| 339 | fe_sendint64(now, &replybuf[len]);	/* sendTime */ | 
|---|
| 340 | len += 8; | 
|---|
| 341 | replybuf[len] = replyRequested ? 1 : 0; /* replyRequested */ | 
|---|
| 342 | len += 1; | 
|---|
| 343 |  | 
|---|
| 344 | if (PQputCopyData(conn, replybuf, len) <= 0 || PQflush(conn)) | 
|---|
| 345 | { | 
|---|
| 346 | pg_log_error( "could not send feedback packet: %s", | 
|---|
| 347 | PQerrorMessage(conn)); | 
|---|
| 348 | return false; | 
|---|
| 349 | } | 
|---|
| 350 |  | 
|---|
| 351 | return true; | 
|---|
| 352 | } | 
|---|
| 353 |  | 
|---|
| 354 | /* | 
|---|
| 355 | * Check that the server version we're connected to is supported by | 
|---|
| 356 | * ReceiveXlogStream(). | 
|---|
| 357 | * | 
|---|
| 358 | * If it's not, an error message is printed to stderr, and false is returned. | 
|---|
| 359 | */ | 
|---|
| 360 | bool | 
|---|
| 361 | CheckServerVersionForStreaming(PGconn *conn) | 
|---|
| 362 | { | 
|---|
| 363 | int			minServerMajor, | 
|---|
| 364 | maxServerMajor; | 
|---|
| 365 | int			serverMajor; | 
|---|
| 366 |  | 
|---|
| 367 | /* | 
|---|
| 368 | * The message format used in streaming replication changed in 9.3, so we | 
|---|
| 369 | * cannot stream from older servers. And we don't support servers newer | 
|---|
| 370 | * than the client; it might work, but we don't know, so err on the safe | 
|---|
| 371 | * side. | 
|---|
| 372 | */ | 
|---|
| 373 | minServerMajor = 903; | 
|---|
| 374 | maxServerMajor = PG_VERSION_NUM / 100; | 
|---|
| 375 | serverMajor = PQserverVersion(conn) / 100; | 
|---|
| 376 | if (serverMajor < minServerMajor) | 
|---|
| 377 | { | 
|---|
| 378 | const char *serverver = PQparameterStatus(conn, "server_version"); | 
|---|
| 379 |  | 
|---|
| 380 | pg_log_error( "incompatible server version %s; client does not support streaming from server versions older than %s", | 
|---|
| 381 | serverver ? serverver : "'unknown'", | 
|---|
| 382 | "9.3"); | 
|---|
| 383 | return false; | 
|---|
| 384 | } | 
|---|
| 385 | else if (serverMajor > maxServerMajor) | 
|---|
| 386 | { | 
|---|
| 387 | const char *serverver = PQparameterStatus(conn, "server_version"); | 
|---|
| 388 |  | 
|---|
| 389 | pg_log_error( "incompatible server version %s; client does not support streaming from server versions newer than %s", | 
|---|
| 390 | serverver ? serverver : "'unknown'", | 
|---|
| 391 | PG_VERSION); | 
|---|
| 392 | return false; | 
|---|
| 393 | } | 
|---|
| 394 | return true; | 
|---|
| 395 | } | 
|---|
| 396 |  | 
|---|
| 397 | /* | 
|---|
| 398 | * Receive a log stream starting at the specified position. | 
|---|
| 399 | * | 
|---|
| 400 | * Individual parameters are passed through the StreamCtl structure. | 
|---|
| 401 | * | 
|---|
| 402 | * If sysidentifier is specified, validate that both the system | 
|---|
| 403 | * identifier and the timeline matches the specified ones | 
|---|
| 404 | * (by sending an extra IDENTIFY_SYSTEM command) | 
|---|
| 405 | * | 
|---|
| 406 | * All received segments will be written to the directory | 
|---|
| 407 | * specified by basedir. This will also fetch any missing timeline history | 
|---|
| 408 | * files. | 
|---|
| 409 | * | 
|---|
| 410 | * The stream_stop callback will be called every time data | 
|---|
| 411 | * is received, and whenever a segment is completed. If it returns | 
|---|
| 412 | * true, the streaming will stop and the function | 
|---|
| 413 | * return. As long as it returns false, streaming will continue | 
|---|
| 414 | * indefinitely. | 
|---|
| 415 | * | 
|---|
| 416 | * If stream_stop() checks for external input, stop_socket should be set to | 
|---|
| 417 | * the FD it checks.  This will allow such input to be detected promptly | 
|---|
| 418 | * rather than after standby_message_timeout (which might be indefinite). | 
|---|
| 419 | * Note that signals will interrupt waits for input as well, but that is | 
|---|
| 420 | * race-y since a signal received while busy won't interrupt the wait. | 
|---|
| 421 | * | 
|---|
| 422 | * standby_message_timeout controls how often we send a message | 
|---|
| 423 | * back to the master letting it know our progress, in milliseconds. | 
|---|
| 424 | * Zero means no messages are sent. | 
|---|
| 425 | * This message will only contain the write location, and never | 
|---|
| 426 | * flush or replay. | 
|---|
| 427 | * | 
|---|
| 428 | * If 'partial_suffix' is not NULL, files are initially created with the | 
|---|
| 429 | * given suffix, and the suffix is removed once the file is finished. That | 
|---|
| 430 | * allows you to tell the difference between partial and completed files, | 
|---|
| 431 | * so that you can continue later where you left. | 
|---|
| 432 | * | 
|---|
| 433 | * If 'synchronous' is true, the received WAL is flushed as soon as written, | 
|---|
| 434 | * otherwise only when the WAL file is closed. | 
|---|
| 435 | * | 
|---|
| 436 | * Note: The WAL location *must* be at a log segment start! | 
|---|
| 437 | */ | 
|---|
| 438 | bool | 
|---|
| 439 | ReceiveXlogStream(PGconn *conn, StreamCtl *stream) | 
|---|
| 440 | { | 
|---|
| 441 | char		query[128]; | 
|---|
| 442 | char		slotcmd[128]; | 
|---|
| 443 | PGresult   *res; | 
|---|
| 444 | XLogRecPtr	stoppos; | 
|---|
| 445 |  | 
|---|
| 446 | /* | 
|---|
| 447 | * The caller should've checked the server version already, but doesn't do | 
|---|
| 448 | * any harm to check it here too. | 
|---|
| 449 | */ | 
|---|
| 450 | if (!CheckServerVersionForStreaming(conn)) | 
|---|
| 451 | return false; | 
|---|
| 452 |  | 
|---|
| 453 | /* | 
|---|
| 454 | * Decide whether we want to report the flush position. If we report the | 
|---|
| 455 | * flush position, the primary will know what WAL we'll possibly | 
|---|
| 456 | * re-request, and it can then remove older WAL safely. We must always do | 
|---|
| 457 | * that when we are using slots. | 
|---|
| 458 | * | 
|---|
| 459 | * Reporting the flush position makes one eligible as a synchronous | 
|---|
| 460 | * replica. People shouldn't include generic names in | 
|---|
| 461 | * synchronous_standby_names, but we've protected them against it so far, | 
|---|
| 462 | * so let's continue to do so unless specifically requested. | 
|---|
| 463 | */ | 
|---|
| 464 | if (stream->replication_slot != NULL) | 
|---|
| 465 | { | 
|---|
| 466 | reportFlushPosition = true; | 
|---|
| 467 | sprintf(slotcmd, "SLOT \"%s\" ", stream->replication_slot); | 
|---|
| 468 | } | 
|---|
| 469 | else | 
|---|
| 470 | { | 
|---|
| 471 | if (stream->synchronous) | 
|---|
| 472 | reportFlushPosition = true; | 
|---|
| 473 | else | 
|---|
| 474 | reportFlushPosition = false; | 
|---|
| 475 | slotcmd[0] = 0; | 
|---|
| 476 | } | 
|---|
| 477 |  | 
|---|
| 478 | if (stream->sysidentifier != NULL) | 
|---|
| 479 | { | 
|---|
| 480 | /* Validate system identifier hasn't changed */ | 
|---|
| 481 | res = PQexec(conn, "IDENTIFY_SYSTEM"); | 
|---|
| 482 | if (PQresultStatus(res) != PGRES_TUPLES_OK) | 
|---|
| 483 | { | 
|---|
| 484 | pg_log_error( "could not send replication command \"%s\": %s", | 
|---|
| 485 | "IDENTIFY_SYSTEM", PQerrorMessage(conn)); | 
|---|
| 486 | PQclear(res); | 
|---|
| 487 | return false; | 
|---|
| 488 | } | 
|---|
| 489 | if (PQntuples(res) != 1 || PQnfields(res) < 3) | 
|---|
| 490 | { | 
|---|
| 491 | pg_log_error( "could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields", | 
|---|
| 492 | PQntuples(res), PQnfields(res), 1, 3); | 
|---|
| 493 | PQclear(res); | 
|---|
| 494 | return false; | 
|---|
| 495 | } | 
|---|
| 496 | if (strcmp(stream->sysidentifier, PQgetvalue(res, 0, 0)) != 0) | 
|---|
| 497 | { | 
|---|
| 498 | pg_log_error( "system identifier does not match between base backup and streaming connection"); | 
|---|
| 499 | PQclear(res); | 
|---|
| 500 | return false; | 
|---|
| 501 | } | 
|---|
| 502 | if (stream->timeline > atoi(PQgetvalue(res, 0, 1))) | 
|---|
| 503 | { | 
|---|
| 504 | pg_log_error( "starting timeline %u is not present in the server", | 
|---|
| 505 | stream->timeline); | 
|---|
| 506 | PQclear(res); | 
|---|
| 507 | return false; | 
|---|
| 508 | } | 
|---|
| 509 | PQclear(res); | 
|---|
| 510 | } | 
|---|
| 511 |  | 
|---|
| 512 | /* | 
|---|
| 513 | * initialize flush position to starting point, it's the caller's | 
|---|
| 514 | * responsibility that that's sane. | 
|---|
| 515 | */ | 
|---|
| 516 | lastFlushPosition = stream->startpos; | 
|---|
| 517 |  | 
|---|
| 518 | while (1) | 
|---|
| 519 | { | 
|---|
| 520 | /* | 
|---|
| 521 | * Fetch the timeline history file for this timeline, if we don't have | 
|---|
| 522 | * it already. When streaming log to tar, this will always return | 
|---|
| 523 | * false, as we are never streaming into an existing file and | 
|---|
| 524 | * therefore there can be no pre-existing timeline history file. | 
|---|
| 525 | */ | 
|---|
| 526 | if (!existsTimeLineHistoryFile(stream)) | 
|---|
| 527 | { | 
|---|
| 528 | snprintf(query, sizeof(query), "TIMELINE_HISTORY %u", stream->timeline); | 
|---|
| 529 | res = PQexec(conn, query); | 
|---|
| 530 | if (PQresultStatus(res) != PGRES_TUPLES_OK) | 
|---|
| 531 | { | 
|---|
| 532 | /* FIXME: we might send it ok, but get an error */ | 
|---|
| 533 | pg_log_error( "could not send replication command \"%s\": %s", | 
|---|
| 534 | "TIMELINE_HISTORY", PQresultErrorMessage(res)); | 
|---|
| 535 | PQclear(res); | 
|---|
| 536 | return false; | 
|---|
| 537 | } | 
|---|
| 538 |  | 
|---|
| 539 | /* | 
|---|
| 540 | * The response to TIMELINE_HISTORY is a single row result set | 
|---|
| 541 | * with two fields: filename and content | 
|---|
| 542 | */ | 
|---|
| 543 | if (PQnfields(res) != 2 || PQntuples(res) != 1) | 
|---|
| 544 | { | 
|---|
| 545 | pg_log_warning( "unexpected response to TIMELINE_HISTORY command: got %d rows and %d fields, expected %d rows and %d fields", | 
|---|
| 546 | PQntuples(res), PQnfields(res), 1, 2); | 
|---|
| 547 | } | 
|---|
| 548 |  | 
|---|
| 549 | /* Write the history file to disk */ | 
|---|
| 550 | writeTimeLineHistoryFile(stream, | 
|---|
| 551 | PQgetvalue(res, 0, 0), | 
|---|
| 552 | PQgetvalue(res, 0, 1)); | 
|---|
| 553 |  | 
|---|
| 554 | PQclear(res); | 
|---|
| 555 | } | 
|---|
| 556 |  | 
|---|
| 557 | /* | 
|---|
| 558 | * Before we start streaming from the requested location, check if the | 
|---|
| 559 | * callback tells us to stop here. | 
|---|
| 560 | */ | 
|---|
| 561 | if (stream->stream_stop(stream->startpos, stream->timeline, false)) | 
|---|
| 562 | return true; | 
|---|
| 563 |  | 
|---|
| 564 | /* Initiate the replication stream at specified location */ | 
|---|
| 565 | snprintf(query, sizeof(query), "START_REPLICATION %s%X/%X TIMELINE %u", | 
|---|
| 566 | slotcmd, | 
|---|
| 567 | (uint32) (stream->startpos >> 32), (uint32) stream->startpos, | 
|---|
| 568 | stream->timeline); | 
|---|
| 569 | res = PQexec(conn, query); | 
|---|
| 570 | if (PQresultStatus(res) != PGRES_COPY_BOTH) | 
|---|
| 571 | { | 
|---|
| 572 | pg_log_error( "could not send replication command \"%s\": %s", | 
|---|
| 573 | "START_REPLICATION", PQresultErrorMessage(res)); | 
|---|
| 574 | PQclear(res); | 
|---|
| 575 | return false; | 
|---|
| 576 | } | 
|---|
| 577 | PQclear(res); | 
|---|
| 578 |  | 
|---|
| 579 | /* Stream the WAL */ | 
|---|
| 580 | res = HandleCopyStream(conn, stream, &stoppos); | 
|---|
| 581 | if (res == NULL) | 
|---|
| 582 | goto error; | 
|---|
| 583 |  | 
|---|
| 584 | /* | 
|---|
| 585 | * Streaming finished. | 
|---|
| 586 | * | 
|---|
| 587 | * There are two possible reasons for that: a controlled shutdown, or | 
|---|
| 588 | * we reached the end of the current timeline. In case of | 
|---|
| 589 | * end-of-timeline, the server sends a result set after Copy has | 
|---|
| 590 | * finished, containing information about the next timeline. Read | 
|---|
| 591 | * that, and restart streaming from the next timeline. In case of | 
|---|
| 592 | * controlled shutdown, stop here. | 
|---|
| 593 | */ | 
|---|
| 594 | if (PQresultStatus(res) == PGRES_TUPLES_OK) | 
|---|
| 595 | { | 
|---|
| 596 | /* | 
|---|
| 597 | * End-of-timeline. Read the next timeline's ID and starting | 
|---|
| 598 | * position. Usually, the starting position will match the end of | 
|---|
| 599 | * the previous timeline, but there are corner cases like if the | 
|---|
| 600 | * server had sent us half of a WAL record, when it was promoted. | 
|---|
| 601 | * The new timeline will begin at the end of the last complete | 
|---|
| 602 | * record in that case, overlapping the partial WAL record on the | 
|---|
| 603 | * old timeline. | 
|---|
| 604 | */ | 
|---|
| 605 | uint32		newtimeline; | 
|---|
| 606 | bool		parsed; | 
|---|
| 607 |  | 
|---|
| 608 | parsed = ReadEndOfStreamingResult(res, &stream->startpos, &newtimeline); | 
|---|
| 609 | PQclear(res); | 
|---|
| 610 | if (!parsed) | 
|---|
| 611 | goto error; | 
|---|
| 612 |  | 
|---|
| 613 | /* Sanity check the values the server gave us */ | 
|---|
| 614 | if (newtimeline <= stream->timeline) | 
|---|
| 615 | { | 
|---|
| 616 | pg_log_error( "server reported unexpected next timeline %u, following timeline %u", | 
|---|
| 617 | newtimeline, stream->timeline); | 
|---|
| 618 | goto error; | 
|---|
| 619 | } | 
|---|
| 620 | if (stream->startpos > stoppos) | 
|---|
| 621 | { | 
|---|
| 622 | pg_log_error( "server stopped streaming timeline %u at %X/%X, but reported next timeline %u to begin at %X/%X", | 
|---|
| 623 | stream->timeline, (uint32) (stoppos >> 32), (uint32) stoppos, | 
|---|
| 624 | newtimeline, (uint32) (stream->startpos >> 32), (uint32) stream->startpos); | 
|---|
| 625 | goto error; | 
|---|
| 626 | } | 
|---|
| 627 |  | 
|---|
| 628 | /* Read the final result, which should be CommandComplete. */ | 
|---|
| 629 | res = PQgetResult(conn); | 
|---|
| 630 | if (PQresultStatus(res) != PGRES_COMMAND_OK) | 
|---|
| 631 | { | 
|---|
| 632 | pg_log_error( "unexpected termination of replication stream: %s", | 
|---|
| 633 | PQresultErrorMessage(res)); | 
|---|
| 634 | PQclear(res); | 
|---|
| 635 | goto error; | 
|---|
| 636 | } | 
|---|
| 637 | PQclear(res); | 
|---|
| 638 |  | 
|---|
| 639 | /* | 
|---|
| 640 | * Loop back to start streaming from the new timeline. Always | 
|---|
| 641 | * start streaming at the beginning of a segment. | 
|---|
| 642 | */ | 
|---|
| 643 | stream->timeline = newtimeline; | 
|---|
| 644 | stream->startpos = stream->startpos - | 
|---|
| 645 | XLogSegmentOffset(stream->startpos, WalSegSz); | 
|---|
| 646 | continue; | 
|---|
| 647 | } | 
|---|
| 648 | else if (PQresultStatus(res) == PGRES_COMMAND_OK) | 
|---|
| 649 | { | 
|---|
| 650 | PQclear(res); | 
|---|
| 651 |  | 
|---|
| 652 | /* | 
|---|
| 653 | * End of replication (ie. controlled shut down of the server). | 
|---|
| 654 | * | 
|---|
| 655 | * Check if the callback thinks it's OK to stop here. If not, | 
|---|
| 656 | * complain. | 
|---|
| 657 | */ | 
|---|
| 658 | if (stream->stream_stop(stoppos, stream->timeline, false)) | 
|---|
| 659 | return true; | 
|---|
| 660 | else | 
|---|
| 661 | { | 
|---|
| 662 | pg_log_error( "replication stream was terminated before stop point"); | 
|---|
| 663 | goto error; | 
|---|
| 664 | } | 
|---|
| 665 | } | 
|---|
| 666 | else | 
|---|
| 667 | { | 
|---|
| 668 | /* Server returned an error. */ | 
|---|
| 669 | pg_log_error( "unexpected termination of replication stream: %s", | 
|---|
| 670 | PQresultErrorMessage(res)); | 
|---|
| 671 | PQclear(res); | 
|---|
| 672 | goto error; | 
|---|
| 673 | } | 
|---|
| 674 | } | 
|---|
| 675 |  | 
|---|
| 676 | error: | 
|---|
| 677 | if (walfile != NULL && stream->walmethod->close(walfile, CLOSE_NO_RENAME) != 0) | 
|---|
| 678 | pg_log_error( "could not close file \"%s\": %s", | 
|---|
| 679 | current_walfile_name, stream->walmethod->getlasterror()); | 
|---|
| 680 | walfile = NULL; | 
|---|
| 681 | return false; | 
|---|
| 682 | } | 
|---|
| 683 |  | 
|---|
| 684 | /* | 
|---|
| 685 | * Helper function to parse the result set returned by server after streaming | 
|---|
| 686 | * has finished. On failure, prints an error to stderr and returns false. | 
|---|
| 687 | */ | 
|---|
| 688 | static bool | 
|---|
| 689 | ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline) | 
|---|
| 690 | { | 
|---|
| 691 | uint32		startpos_xlogid, | 
|---|
| 692 | startpos_xrecoff; | 
|---|
| 693 |  | 
|---|
| 694 | /*---------- | 
|---|
| 695 | * The result set consists of one row and two columns, e.g: | 
|---|
| 696 | * | 
|---|
| 697 | *	next_tli | next_tli_startpos | 
|---|
| 698 | * ----------+------------------- | 
|---|
| 699 | *		   4 | 0/9949AE0 | 
|---|
| 700 | * | 
|---|
| 701 | * next_tli is the timeline ID of the next timeline after the one that | 
|---|
| 702 | * just finished streaming. next_tli_startpos is the WAL location where | 
|---|
| 703 | * the server switched to it. | 
|---|
| 704 | *---------- | 
|---|
| 705 | */ | 
|---|
| 706 | if (PQnfields(res) < 2 || PQntuples(res) != 1) | 
|---|
| 707 | { | 
|---|
| 708 | pg_log_error( "unexpected result set after end-of-timeline: got %d rows and %d fields, expected %d rows and %d fields", | 
|---|
| 709 | PQntuples(res), PQnfields(res), 1, 2); | 
|---|
| 710 | return false; | 
|---|
| 711 | } | 
|---|
| 712 |  | 
|---|
| 713 | *timeline = atoi(PQgetvalue(res, 0, 0)); | 
|---|
| 714 | if (sscanf(PQgetvalue(res, 0, 1), "%X/%X", &startpos_xlogid, | 
|---|
| 715 | &startpos_xrecoff) != 2) | 
|---|
| 716 | { | 
|---|
| 717 | pg_log_error( "could not parse next timeline's starting point \"%s\"", | 
|---|
| 718 | PQgetvalue(res, 0, 1)); | 
|---|
| 719 | return false; | 
|---|
| 720 | } | 
|---|
| 721 | *startpos = ((uint64) startpos_xlogid << 32) | startpos_xrecoff; | 
|---|
| 722 |  | 
|---|
| 723 | return true; | 
|---|
| 724 | } | 
|---|
| 725 |  | 
|---|
| 726 | /* | 
|---|
| 727 | * The main loop of ReceiveXlogStream. Handles the COPY stream after | 
|---|
| 728 | * initiating streaming with the START_REPLICATION command. | 
|---|
| 729 | * | 
|---|
| 730 | * If the COPY ends (not necessarily successfully) due a message from the | 
|---|
| 731 | * server, returns a PGresult and sets *stoppos to the last byte written. | 
|---|
| 732 | * On any other sort of error, returns NULL. | 
|---|
| 733 | */ | 
|---|
| 734 | static PGresult * | 
|---|
| 735 | HandleCopyStream(PGconn *conn, StreamCtl *stream, | 
|---|
| 736 | XLogRecPtr *stoppos) | 
|---|
| 737 | { | 
|---|
| 738 | char	   *copybuf = NULL; | 
|---|
| 739 | TimestampTz last_status = -1; | 
|---|
| 740 | XLogRecPtr	blockpos = stream->startpos; | 
|---|
| 741 |  | 
|---|
| 742 | still_sending = true; | 
|---|
| 743 |  | 
|---|
| 744 | while (1) | 
|---|
| 745 | { | 
|---|
| 746 | int			r; | 
|---|
| 747 | TimestampTz now; | 
|---|
| 748 | long		sleeptime; | 
|---|
| 749 |  | 
|---|
| 750 | /* | 
|---|
| 751 | * Check if we should continue streaming, or abort at this point. | 
|---|
| 752 | */ | 
|---|
| 753 | if (!CheckCopyStreamStop(conn, stream, blockpos, stoppos)) | 
|---|
| 754 | goto error; | 
|---|
| 755 |  | 
|---|
| 756 | now = feGetCurrentTimestamp(); | 
|---|
| 757 |  | 
|---|
| 758 | /* | 
|---|
| 759 | * If synchronous option is true, issue sync command as soon as there | 
|---|
| 760 | * are WAL data which has not been flushed yet. | 
|---|
| 761 | */ | 
|---|
| 762 | if (stream->synchronous && lastFlushPosition < blockpos && walfile != NULL) | 
|---|
| 763 | { | 
|---|
| 764 | if (stream->walmethod->sync(walfile) != 0) | 
|---|
| 765 | { | 
|---|
| 766 | pg_log_error( "could not fsync file \"%s\": %s", | 
|---|
| 767 | current_walfile_name, stream->walmethod->getlasterror()); | 
|---|
| 768 | goto error; | 
|---|
| 769 | } | 
|---|
| 770 | lastFlushPosition = blockpos; | 
|---|
| 771 |  | 
|---|
| 772 | /* | 
|---|
| 773 | * Send feedback so that the server sees the latest WAL locations | 
|---|
| 774 | * immediately. | 
|---|
| 775 | */ | 
|---|
| 776 | if (!sendFeedback(conn, blockpos, now, false)) | 
|---|
| 777 | goto error; | 
|---|
| 778 | last_status = now; | 
|---|
| 779 | } | 
|---|
| 780 |  | 
|---|
| 781 | /* | 
|---|
| 782 | * Potentially send a status message to the master | 
|---|
| 783 | */ | 
|---|
| 784 | if (still_sending && stream->standby_message_timeout > 0 && | 
|---|
| 785 | feTimestampDifferenceExceeds(last_status, now, | 
|---|
| 786 | stream->standby_message_timeout)) | 
|---|
| 787 | { | 
|---|
| 788 | /* Time to send feedback! */ | 
|---|
| 789 | if (!sendFeedback(conn, blockpos, now, false)) | 
|---|
| 790 | goto error; | 
|---|
| 791 | last_status = now; | 
|---|
| 792 | } | 
|---|
| 793 |  | 
|---|
| 794 | /* | 
|---|
| 795 | * Calculate how long send/receive loops should sleep | 
|---|
| 796 | */ | 
|---|
| 797 | sleeptime = CalculateCopyStreamSleeptime(now, stream->standby_message_timeout, | 
|---|
| 798 | last_status); | 
|---|
| 799 |  | 
|---|
| 800 | r = CopyStreamReceive(conn, sleeptime, stream->stop_socket, ©buf); | 
|---|
| 801 | while (r != 0) | 
|---|
| 802 | { | 
|---|
| 803 | if (r == -1) | 
|---|
| 804 | goto error; | 
|---|
| 805 | if (r == -2) | 
|---|
| 806 | { | 
|---|
| 807 | PGresult   *res = HandleEndOfCopyStream(conn, stream, copybuf, blockpos, stoppos); | 
|---|
| 808 |  | 
|---|
| 809 | if (res == NULL) | 
|---|
| 810 | goto error; | 
|---|
| 811 | else | 
|---|
| 812 | return res; | 
|---|
| 813 | } | 
|---|
| 814 |  | 
|---|
| 815 | /* Check the message type. */ | 
|---|
| 816 | if (copybuf[0] == 'k') | 
|---|
| 817 | { | 
|---|
| 818 | if (!ProcessKeepaliveMsg(conn, stream, copybuf, r, blockpos, | 
|---|
| 819 | &last_status)) | 
|---|
| 820 | goto error; | 
|---|
| 821 | } | 
|---|
| 822 | else if (copybuf[0] == 'w') | 
|---|
| 823 | { | 
|---|
| 824 | if (!ProcessXLogDataMsg(conn, stream, copybuf, r, &blockpos)) | 
|---|
| 825 | goto error; | 
|---|
| 826 |  | 
|---|
| 827 | /* | 
|---|
| 828 | * Check if we should continue streaming, or abort at this | 
|---|
| 829 | * point. | 
|---|
| 830 | */ | 
|---|
| 831 | if (!CheckCopyStreamStop(conn, stream, blockpos, stoppos)) | 
|---|
| 832 | goto error; | 
|---|
| 833 | } | 
|---|
| 834 | else | 
|---|
| 835 | { | 
|---|
| 836 | pg_log_error( "unrecognized streaming header: \"%c\"", | 
|---|
| 837 | copybuf[0]); | 
|---|
| 838 | goto error; | 
|---|
| 839 | } | 
|---|
| 840 |  | 
|---|
| 841 | /* | 
|---|
| 842 | * Process the received data, and any subsequent data we can read | 
|---|
| 843 | * without blocking. | 
|---|
| 844 | */ | 
|---|
| 845 | r = CopyStreamReceive(conn, 0, stream->stop_socket, ©buf); | 
|---|
| 846 | } | 
|---|
| 847 | } | 
|---|
| 848 |  | 
|---|
| 849 | error: | 
|---|
| 850 | if (copybuf != NULL) | 
|---|
| 851 | PQfreemem(copybuf); | 
|---|
| 852 | return NULL; | 
|---|
| 853 | } | 
|---|
| 854 |  | 
|---|
| 855 | /* | 
|---|
| 856 | * Wait until we can read a CopyData message, | 
|---|
| 857 | * or timeout, or occurrence of a signal or input on the stop_socket. | 
|---|
| 858 | * (timeout_ms < 0 means wait indefinitely; 0 means don't wait.) | 
|---|
| 859 | * | 
|---|
| 860 | * Returns 1 if data has become available for reading, 0 if timed out | 
|---|
| 861 | * or interrupted by signal or stop_socket input, and -1 on an error. | 
|---|
| 862 | */ | 
|---|
| 863 | static int | 
|---|
| 864 | CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket) | 
|---|
| 865 | { | 
|---|
| 866 | int			ret; | 
|---|
| 867 | fd_set		input_mask; | 
|---|
| 868 | int			connsocket; | 
|---|
| 869 | int			maxfd; | 
|---|
| 870 | struct timeval timeout; | 
|---|
| 871 | struct timeval *timeoutptr; | 
|---|
| 872 |  | 
|---|
| 873 | connsocket = PQsocket(conn); | 
|---|
| 874 | if (connsocket < 0) | 
|---|
| 875 | { | 
|---|
| 876 | pg_log_error( "invalid socket: %s", PQerrorMessage(conn)); | 
|---|
| 877 | return -1; | 
|---|
| 878 | } | 
|---|
| 879 |  | 
|---|
| 880 | FD_ZERO(&input_mask); | 
|---|
| 881 | FD_SET(connsocket, &input_mask); | 
|---|
| 882 | maxfd = connsocket; | 
|---|
| 883 | if (stop_socket != PGINVALID_SOCKET) | 
|---|
| 884 | { | 
|---|
| 885 | FD_SET(stop_socket, &input_mask); | 
|---|
| 886 | maxfd = Max(maxfd, stop_socket); | 
|---|
| 887 | } | 
|---|
| 888 |  | 
|---|
| 889 | if (timeout_ms < 0) | 
|---|
| 890 | timeoutptr = NULL; | 
|---|
| 891 | else | 
|---|
| 892 | { | 
|---|
| 893 | timeout.tv_sec = timeout_ms / 1000L; | 
|---|
| 894 | timeout.tv_usec = (timeout_ms % 1000L) * 1000L; | 
|---|
| 895 | timeoutptr = &timeout; | 
|---|
| 896 | } | 
|---|
| 897 |  | 
|---|
| 898 | ret = select(maxfd + 1, &input_mask, NULL, NULL, timeoutptr); | 
|---|
| 899 |  | 
|---|
| 900 | if (ret < 0) | 
|---|
| 901 | { | 
|---|
| 902 | if (errno == EINTR) | 
|---|
| 903 | return 0;			/* Got a signal, so not an error */ | 
|---|
| 904 | pg_log_error( "select() failed: %m"); | 
|---|
| 905 | return -1; | 
|---|
| 906 | } | 
|---|
| 907 | if (ret > 0 && FD_ISSET(connsocket, &input_mask)) | 
|---|
| 908 | return 1;				/* Got input on connection socket */ | 
|---|
| 909 |  | 
|---|
| 910 | return 0;					/* Got timeout or input on stop_socket */ | 
|---|
| 911 | } | 
|---|
| 912 |  | 
|---|
| 913 | /* | 
|---|
| 914 | * Receive CopyData message available from XLOG stream, blocking for | 
|---|
| 915 | * maximum of 'timeout' ms. | 
|---|
| 916 | * | 
|---|
| 917 | * If data was received, returns the length of the data. *buffer is set to | 
|---|
| 918 | * point to a buffer holding the received message. The buffer is only valid | 
|---|
| 919 | * until the next CopyStreamReceive call. | 
|---|
| 920 | * | 
|---|
| 921 | * Returns 0 if no data was available within timeout, or if wait was | 
|---|
| 922 | * interrupted by signal or stop_socket input. | 
|---|
| 923 | * -1 on error. -2 if the server ended the COPY. | 
|---|
| 924 | */ | 
|---|
| 925 | static int | 
|---|
| 926 | CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket, | 
|---|
| 927 | char **buffer) | 
|---|
| 928 | { | 
|---|
| 929 | char	   *copybuf = NULL; | 
|---|
| 930 | int			rawlen; | 
|---|
| 931 |  | 
|---|
| 932 | if (*buffer != NULL) | 
|---|
| 933 | PQfreemem(*buffer); | 
|---|
| 934 | *buffer = NULL; | 
|---|
| 935 |  | 
|---|
| 936 | /* Try to receive a CopyData message */ | 
|---|
| 937 | rawlen = PQgetCopyData(conn, ©buf, 1); | 
|---|
| 938 | if (rawlen == 0) | 
|---|
| 939 | { | 
|---|
| 940 | int			ret; | 
|---|
| 941 |  | 
|---|
| 942 | /* | 
|---|
| 943 | * No data available.  Wait for some to appear, but not longer than | 
|---|
| 944 | * the specified timeout, so that we can ping the server.  Also stop | 
|---|
| 945 | * waiting if input appears on stop_socket. | 
|---|
| 946 | */ | 
|---|
| 947 | ret = CopyStreamPoll(conn, timeout, stop_socket); | 
|---|
| 948 | if (ret <= 0) | 
|---|
| 949 | return ret; | 
|---|
| 950 |  | 
|---|
| 951 | /* Now there is actually data on the socket */ | 
|---|
| 952 | if (PQconsumeInput(conn) == 0) | 
|---|
| 953 | { | 
|---|
| 954 | pg_log_error( "could not receive data from WAL stream: %s", | 
|---|
| 955 | PQerrorMessage(conn)); | 
|---|
| 956 | return -1; | 
|---|
| 957 | } | 
|---|
| 958 |  | 
|---|
| 959 | /* Now that we've consumed some input, try again */ | 
|---|
| 960 | rawlen = PQgetCopyData(conn, ©buf, 1); | 
|---|
| 961 | if (rawlen == 0) | 
|---|
| 962 | return 0; | 
|---|
| 963 | } | 
|---|
| 964 | if (rawlen == -1)			/* end-of-streaming or error */ | 
|---|
| 965 | return -2; | 
|---|
| 966 | if (rawlen == -2) | 
|---|
| 967 | { | 
|---|
| 968 | pg_log_error( "could not read COPY data: %s", PQerrorMessage(conn)); | 
|---|
| 969 | return -1; | 
|---|
| 970 | } | 
|---|
| 971 |  | 
|---|
| 972 | /* Return received messages to caller */ | 
|---|
| 973 | *buffer = copybuf; | 
|---|
| 974 | return rawlen; | 
|---|
| 975 | } | 
|---|
| 976 |  | 
|---|
| 977 | /* | 
|---|
| 978 | * Process the keepalive message. | 
|---|
| 979 | */ | 
|---|
| 980 | static bool | 
|---|
| 981 | ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, | 
|---|
| 982 | XLogRecPtr blockpos, TimestampTz *last_status) | 
|---|
| 983 | { | 
|---|
| 984 | int			pos; | 
|---|
| 985 | bool		replyRequested; | 
|---|
| 986 | TimestampTz now; | 
|---|
| 987 |  | 
|---|
| 988 | /* | 
|---|
| 989 | * Parse the keepalive message, enclosed in the CopyData message. We just | 
|---|
| 990 | * check if the server requested a reply, and ignore the rest. | 
|---|
| 991 | */ | 
|---|
| 992 | pos = 1;					/* skip msgtype 'k' */ | 
|---|
| 993 | pos += 8;					/* skip walEnd */ | 
|---|
| 994 | pos += 8;					/* skip sendTime */ | 
|---|
| 995 |  | 
|---|
| 996 | if (len < pos + 1) | 
|---|
| 997 | { | 
|---|
| 998 | pg_log_error( "streaming header too small: %d", len); | 
|---|
| 999 | return false; | 
|---|
| 1000 | } | 
|---|
| 1001 | replyRequested = copybuf[pos]; | 
|---|
| 1002 |  | 
|---|
| 1003 | /* If the server requested an immediate reply, send one. */ | 
|---|
| 1004 | if (replyRequested && still_sending) | 
|---|
| 1005 | { | 
|---|
| 1006 | if (reportFlushPosition && lastFlushPosition < blockpos && | 
|---|
| 1007 | walfile != NULL) | 
|---|
| 1008 | { | 
|---|
| 1009 | /* | 
|---|
| 1010 | * If a valid flush location needs to be reported, flush the | 
|---|
| 1011 | * current WAL file so that the latest flush location is sent back | 
|---|
| 1012 | * to the server. This is necessary to see whether the last WAL | 
|---|
| 1013 | * data has been successfully replicated or not, at the normal | 
|---|
| 1014 | * shutdown of the server. | 
|---|
| 1015 | */ | 
|---|
| 1016 | if (stream->walmethod->sync(walfile) != 0) | 
|---|
| 1017 | { | 
|---|
| 1018 | pg_log_error( "could not fsync file \"%s\": %s", | 
|---|
| 1019 | current_walfile_name, stream->walmethod->getlasterror()); | 
|---|
| 1020 | return false; | 
|---|
| 1021 | } | 
|---|
| 1022 | lastFlushPosition = blockpos; | 
|---|
| 1023 | } | 
|---|
| 1024 |  | 
|---|
| 1025 | now = feGetCurrentTimestamp(); | 
|---|
| 1026 | if (!sendFeedback(conn, blockpos, now, false)) | 
|---|
| 1027 | return false; | 
|---|
| 1028 | *last_status = now; | 
|---|
| 1029 | } | 
|---|
| 1030 |  | 
|---|
| 1031 | return true; | 
|---|
| 1032 | } | 
|---|
| 1033 |  | 
|---|
| 1034 | /* | 
|---|
| 1035 | * Process XLogData message. | 
|---|
| 1036 | */ | 
|---|
| 1037 | static bool | 
|---|
| 1038 | ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, | 
|---|
| 1039 | XLogRecPtr *blockpos) | 
|---|
| 1040 | { | 
|---|
| 1041 | int			xlogoff; | 
|---|
| 1042 | int			bytes_left; | 
|---|
| 1043 | int			bytes_written; | 
|---|
| 1044 | int			hdr_len; | 
|---|
| 1045 |  | 
|---|
| 1046 | /* | 
|---|
| 1047 | * Once we've decided we don't want to receive any more, just ignore any | 
|---|
| 1048 | * subsequent XLogData messages. | 
|---|
| 1049 | */ | 
|---|
| 1050 | if (!(still_sending)) | 
|---|
| 1051 | return true; | 
|---|
| 1052 |  | 
|---|
| 1053 | /* | 
|---|
| 1054 | * Read the header of the XLogData message, enclosed in the CopyData | 
|---|
| 1055 | * message. We only need the WAL location field (dataStart), the rest of | 
|---|
| 1056 | * the header is ignored. | 
|---|
| 1057 | */ | 
|---|
| 1058 | hdr_len = 1;				/* msgtype 'w' */ | 
|---|
| 1059 | hdr_len += 8;				/* dataStart */ | 
|---|
| 1060 | hdr_len += 8;				/* walEnd */ | 
|---|
| 1061 | hdr_len += 8;				/* sendTime */ | 
|---|
| 1062 | if (len < hdr_len) | 
|---|
| 1063 | { | 
|---|
| 1064 | pg_log_error( "streaming header too small: %d", len); | 
|---|
| 1065 | return false; | 
|---|
| 1066 | } | 
|---|
| 1067 | *blockpos = fe_recvint64(©buf[1]); | 
|---|
| 1068 |  | 
|---|
| 1069 | /* Extract WAL location for this block */ | 
|---|
| 1070 | xlogoff = XLogSegmentOffset(*blockpos, WalSegSz); | 
|---|
| 1071 |  | 
|---|
| 1072 | /* | 
|---|
| 1073 | * Verify that the initial location in the stream matches where we think | 
|---|
| 1074 | * we are. | 
|---|
| 1075 | */ | 
|---|
| 1076 | if (walfile == NULL) | 
|---|
| 1077 | { | 
|---|
| 1078 | /* No file open yet */ | 
|---|
| 1079 | if (xlogoff != 0) | 
|---|
| 1080 | { | 
|---|
| 1081 | pg_log_error( "received write-ahead log record for offset %u with no file open", | 
|---|
| 1082 | xlogoff); | 
|---|
| 1083 | return false; | 
|---|
| 1084 | } | 
|---|
| 1085 | } | 
|---|
| 1086 | else | 
|---|
| 1087 | { | 
|---|
| 1088 | /* More data in existing segment */ | 
|---|
| 1089 | if (stream->walmethod->get_current_pos(walfile) != xlogoff) | 
|---|
| 1090 | { | 
|---|
| 1091 | pg_log_error( "got WAL data offset %08x, expected %08x", | 
|---|
| 1092 | xlogoff, (int) stream->walmethod->get_current_pos(walfile)); | 
|---|
| 1093 | return false; | 
|---|
| 1094 | } | 
|---|
| 1095 | } | 
|---|
| 1096 |  | 
|---|
| 1097 | bytes_left = len - hdr_len; | 
|---|
| 1098 | bytes_written = 0; | 
|---|
| 1099 |  | 
|---|
| 1100 | while (bytes_left) | 
|---|
| 1101 | { | 
|---|
| 1102 | int			bytes_to_write; | 
|---|
| 1103 |  | 
|---|
| 1104 | /* | 
|---|
| 1105 | * If crossing a WAL boundary, only write up until we reach wal | 
|---|
| 1106 | * segment size. | 
|---|
| 1107 | */ | 
|---|
| 1108 | if (xlogoff + bytes_left > WalSegSz) | 
|---|
| 1109 | bytes_to_write = WalSegSz - xlogoff; | 
|---|
| 1110 | else | 
|---|
| 1111 | bytes_to_write = bytes_left; | 
|---|
| 1112 |  | 
|---|
| 1113 | if (walfile == NULL) | 
|---|
| 1114 | { | 
|---|
| 1115 | if (!open_walfile(stream, *blockpos)) | 
|---|
| 1116 | { | 
|---|
| 1117 | /* Error logged by open_walfile */ | 
|---|
| 1118 | return false; | 
|---|
| 1119 | } | 
|---|
| 1120 | } | 
|---|
| 1121 |  | 
|---|
| 1122 | if (stream->walmethod->write(walfile, copybuf + hdr_len + bytes_written, | 
|---|
| 1123 | bytes_to_write) != bytes_to_write) | 
|---|
| 1124 | { | 
|---|
| 1125 | pg_log_error( "could not write %u bytes to WAL file \"%s\": %s", | 
|---|
| 1126 | bytes_to_write, current_walfile_name, | 
|---|
| 1127 | stream->walmethod->getlasterror()); | 
|---|
| 1128 | return false; | 
|---|
| 1129 | } | 
|---|
| 1130 |  | 
|---|
| 1131 | /* Write was successful, advance our position */ | 
|---|
| 1132 | bytes_written += bytes_to_write; | 
|---|
| 1133 | bytes_left -= bytes_to_write; | 
|---|
| 1134 | *blockpos += bytes_to_write; | 
|---|
| 1135 | xlogoff += bytes_to_write; | 
|---|
| 1136 |  | 
|---|
| 1137 | /* Did we reach the end of a WAL segment? */ | 
|---|
| 1138 | if (XLogSegmentOffset(*blockpos, WalSegSz) == 0) | 
|---|
| 1139 | { | 
|---|
| 1140 | if (!close_walfile(stream, *blockpos)) | 
|---|
| 1141 | /* Error message written in close_walfile() */ | 
|---|
| 1142 | return false; | 
|---|
| 1143 |  | 
|---|
| 1144 | xlogoff = 0; | 
|---|
| 1145 |  | 
|---|
| 1146 | if (still_sending && stream->stream_stop(*blockpos, stream->timeline, true)) | 
|---|
| 1147 | { | 
|---|
| 1148 | if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn)) | 
|---|
| 1149 | { | 
|---|
| 1150 | pg_log_error( "could not send copy-end packet: %s", | 
|---|
| 1151 | PQerrorMessage(conn)); | 
|---|
| 1152 | return false; | 
|---|
| 1153 | } | 
|---|
| 1154 | still_sending = false; | 
|---|
| 1155 | return true;	/* ignore the rest of this XLogData packet */ | 
|---|
| 1156 | } | 
|---|
| 1157 | } | 
|---|
| 1158 | } | 
|---|
| 1159 | /* No more data left to write, receive next copy packet */ | 
|---|
| 1160 |  | 
|---|
| 1161 | return true; | 
|---|
| 1162 | } | 
|---|
| 1163 |  | 
|---|
| 1164 | /* | 
|---|
| 1165 | * Handle end of the copy stream. | 
|---|
| 1166 | */ | 
|---|
| 1167 | static PGresult * | 
|---|
| 1168 | HandleEndOfCopyStream(PGconn *conn, StreamCtl *stream, char *copybuf, | 
|---|
| 1169 | XLogRecPtr blockpos, XLogRecPtr *stoppos) | 
|---|
| 1170 | { | 
|---|
| 1171 | PGresult   *res = PQgetResult(conn); | 
|---|
| 1172 |  | 
|---|
| 1173 | /* | 
|---|
| 1174 | * The server closed its end of the copy stream.  If we haven't closed | 
|---|
| 1175 | * ours already, we need to do so now, unless the server threw an error, | 
|---|
| 1176 | * in which case we don't. | 
|---|
| 1177 | */ | 
|---|
| 1178 | if (still_sending) | 
|---|
| 1179 | { | 
|---|
| 1180 | if (!close_walfile(stream, blockpos)) | 
|---|
| 1181 | { | 
|---|
| 1182 | /* Error message written in close_walfile() */ | 
|---|
| 1183 | PQclear(res); | 
|---|
| 1184 | return NULL; | 
|---|
| 1185 | } | 
|---|
| 1186 | if (PQresultStatus(res) == PGRES_COPY_IN) | 
|---|
| 1187 | { | 
|---|
| 1188 | if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn)) | 
|---|
| 1189 | { | 
|---|
| 1190 | pg_log_error( "could not send copy-end packet: %s", | 
|---|
| 1191 | PQerrorMessage(conn)); | 
|---|
| 1192 | PQclear(res); | 
|---|
| 1193 | return NULL; | 
|---|
| 1194 | } | 
|---|
| 1195 | res = PQgetResult(conn); | 
|---|
| 1196 | } | 
|---|
| 1197 | still_sending = false; | 
|---|
| 1198 | } | 
|---|
| 1199 | if (copybuf != NULL) | 
|---|
| 1200 | PQfreemem(copybuf); | 
|---|
| 1201 | *stoppos = blockpos; | 
|---|
| 1202 | return res; | 
|---|
| 1203 | } | 
|---|
| 1204 |  | 
|---|
| 1205 | /* | 
|---|
| 1206 | * Check if we should continue streaming, or abort at this point. | 
|---|
| 1207 | */ | 
|---|
| 1208 | static bool | 
|---|
| 1209 | CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos, | 
|---|
| 1210 | XLogRecPtr *stoppos) | 
|---|
| 1211 | { | 
|---|
| 1212 | if (still_sending && stream->stream_stop(blockpos, stream->timeline, false)) | 
|---|
| 1213 | { | 
|---|
| 1214 | if (!close_walfile(stream, blockpos)) | 
|---|
| 1215 | { | 
|---|
| 1216 | /* Potential error message is written by close_walfile */ | 
|---|
| 1217 | return false; | 
|---|
| 1218 | } | 
|---|
| 1219 | if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn)) | 
|---|
| 1220 | { | 
|---|
| 1221 | pg_log_error( "could not send copy-end packet: %s", | 
|---|
| 1222 | PQerrorMessage(conn)); | 
|---|
| 1223 | return false; | 
|---|
| 1224 | } | 
|---|
| 1225 | still_sending = false; | 
|---|
| 1226 | } | 
|---|
| 1227 |  | 
|---|
| 1228 | return true; | 
|---|
| 1229 | } | 
|---|
| 1230 |  | 
|---|
| 1231 | /* | 
|---|
| 1232 | * Calculate how long send/receive loops should sleep | 
|---|
| 1233 | */ | 
|---|
| 1234 | static long | 
|---|
| 1235 | CalculateCopyStreamSleeptime(TimestampTz now, int standby_message_timeout, | 
|---|
| 1236 | TimestampTz last_status) | 
|---|
| 1237 | { | 
|---|
| 1238 | TimestampTz status_targettime = 0; | 
|---|
| 1239 | long		sleeptime; | 
|---|
| 1240 |  | 
|---|
| 1241 | if (standby_message_timeout && still_sending) | 
|---|
| 1242 | status_targettime = last_status + | 
|---|
| 1243 | (standby_message_timeout - 1) * ((int64) 1000); | 
|---|
| 1244 |  | 
|---|
| 1245 | if (status_targettime > 0) | 
|---|
| 1246 | { | 
|---|
| 1247 | long		secs; | 
|---|
| 1248 | int			usecs; | 
|---|
| 1249 |  | 
|---|
| 1250 | feTimestampDifference(now, | 
|---|
| 1251 | status_targettime, | 
|---|
| 1252 | &secs, | 
|---|
| 1253 | &usecs); | 
|---|
| 1254 | /* Always sleep at least 1 sec */ | 
|---|
| 1255 | if (secs <= 0) | 
|---|
| 1256 | { | 
|---|
| 1257 | secs = 1; | 
|---|
| 1258 | usecs = 0; | 
|---|
| 1259 | } | 
|---|
| 1260 |  | 
|---|
| 1261 | sleeptime = secs * 1000 + usecs / 1000; | 
|---|
| 1262 | } | 
|---|
| 1263 | else | 
|---|
| 1264 | sleeptime = -1; | 
|---|
| 1265 |  | 
|---|
| 1266 | return sleeptime; | 
|---|
| 1267 | } | 
|---|
| 1268 |  | 
|---|