1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * xlogreader.c |
4 | * Generic XLog reading facility |
5 | * |
6 | * Portions Copyright (c) 2013-2019, PostgreSQL Global Development Group |
7 | * |
8 | * IDENTIFICATION |
9 | * src/backend/access/transam/xlogreader.c |
10 | * |
11 | * NOTES |
12 | * See xlogreader.h for more notes on this facility. |
13 | * |
14 | * This file is compiled as both front-end and backend code, so it |
15 | * may not use ereport, server-defined static variables, etc. |
16 | *------------------------------------------------------------------------- |
17 | */ |
18 | #include "postgres.h" |
19 | |
20 | #include "access/transam.h" |
21 | #include "access/xlogrecord.h" |
22 | #include "access/xlog_internal.h" |
23 | #include "access/xlogreader.h" |
24 | #include "catalog/pg_control.h" |
25 | #include "common/pg_lzcompress.h" |
26 | #include "replication/origin.h" |
27 | |
28 | #ifndef FRONTEND |
29 | #include "utils/memutils.h" |
30 | #endif |
31 | |
32 | static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength); |
33 | |
34 | static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, |
35 | XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess); |
36 | static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record, |
37 | XLogRecPtr recptr); |
38 | static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, |
39 | int reqLen); |
40 | static void report_invalid_record(XLogReaderState *state, const char *fmt,...) pg_attribute_printf(2, 3); |
41 | |
42 | static void ResetDecoder(XLogReaderState *state); |
43 | |
44 | /* size of the buffer allocated for error message. */ |
45 | #define MAX_ERRORMSG_LEN 1000 |
46 | |
47 | /* |
48 | * Construct a string in state->errormsg_buf explaining what's wrong with |
49 | * the current record being read. |
50 | */ |
51 | static void |
52 | report_invalid_record(XLogReaderState *state, const char *fmt,...) |
53 | { |
54 | va_list args; |
55 | |
56 | fmt = _(fmt); |
57 | |
58 | va_start(args, fmt); |
59 | vsnprintf(state->errormsg_buf, MAX_ERRORMSG_LEN, fmt, args); |
60 | va_end(args); |
61 | } |
62 | |
63 | /* |
64 | * Allocate and initialize a new XLogReader. |
65 | * |
66 | * Returns NULL if the xlogreader couldn't be allocated. |
67 | */ |
68 | XLogReaderState * |
69 | XLogReaderAllocate(int wal_segment_size, XLogPageReadCB , |
70 | void *private_data) |
71 | { |
72 | XLogReaderState *state; |
73 | |
74 | state = (XLogReaderState *) |
75 | palloc_extended(sizeof(XLogReaderState), |
76 | MCXT_ALLOC_NO_OOM | MCXT_ALLOC_ZERO); |
77 | if (!state) |
78 | return NULL; |
79 | |
80 | state->max_block_id = -1; |
81 | |
82 | /* |
83 | * Permanently allocate readBuf. We do it this way, rather than just |
84 | * making a static array, for two reasons: (1) no need to waste the |
85 | * storage in most instantiations of the backend; (2) a static char array |
86 | * isn't guaranteed to have any particular alignment, whereas |
87 | * palloc_extended() will provide MAXALIGN'd storage. |
88 | */ |
89 | state->readBuf = (char *) palloc_extended(XLOG_BLCKSZ, |
90 | MCXT_ALLOC_NO_OOM); |
91 | if (!state->readBuf) |
92 | { |
93 | pfree(state); |
94 | return NULL; |
95 | } |
96 | |
97 | state->wal_segment_size = wal_segment_size; |
98 | state->read_page = pagereadfunc; |
99 | /* system_identifier initialized to zeroes above */ |
100 | state->private_data = private_data; |
101 | /* ReadRecPtr and EndRecPtr initialized to zeroes above */ |
102 | /* readSegNo, readOff, readLen, readPageTLI initialized to zeroes above */ |
103 | state->errormsg_buf = palloc_extended(MAX_ERRORMSG_LEN + 1, |
104 | MCXT_ALLOC_NO_OOM); |
105 | if (!state->errormsg_buf) |
106 | { |
107 | pfree(state->readBuf); |
108 | pfree(state); |
109 | return NULL; |
110 | } |
111 | state->errormsg_buf[0] = '\0'; |
112 | |
113 | /* |
114 | * Allocate an initial readRecordBuf of minimal size, which can later be |
115 | * enlarged if necessary. |
116 | */ |
117 | if (!allocate_recordbuf(state, 0)) |
118 | { |
119 | pfree(state->errormsg_buf); |
120 | pfree(state->readBuf); |
121 | pfree(state); |
122 | return NULL; |
123 | } |
124 | |
125 | return state; |
126 | } |
127 | |
128 | void |
129 | XLogReaderFree(XLogReaderState *state) |
130 | { |
131 | int block_id; |
132 | |
133 | for (block_id = 0; block_id <= XLR_MAX_BLOCK_ID; block_id++) |
134 | { |
135 | if (state->blocks[block_id].data) |
136 | pfree(state->blocks[block_id].data); |
137 | } |
138 | if (state->main_data) |
139 | pfree(state->main_data); |
140 | |
141 | pfree(state->errormsg_buf); |
142 | if (state->readRecordBuf) |
143 | pfree(state->readRecordBuf); |
144 | pfree(state->readBuf); |
145 | pfree(state); |
146 | } |
147 | |
148 | /* |
149 | * Allocate readRecordBuf to fit a record of at least the given length. |
150 | * Returns true if successful, false if out of memory. |
151 | * |
152 | * readRecordBufSize is set to the new buffer size. |
153 | * |
154 | * To avoid useless small increases, round its size to a multiple of |
155 | * XLOG_BLCKSZ, and make sure it's at least 5*Max(BLCKSZ, XLOG_BLCKSZ) to start |
156 | * with. (That is enough for all "normal" records, but very large commit or |
157 | * abort records might need more space.) |
158 | */ |
159 | static bool |
160 | allocate_recordbuf(XLogReaderState *state, uint32 reclength) |
161 | { |
162 | uint32 newSize = reclength; |
163 | |
164 | newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ); |
165 | newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ)); |
166 | |
167 | #ifndef FRONTEND |
168 | |
169 | /* |
170 | * Note that in much unlucky circumstances, the random data read from a |
171 | * recycled segment can cause this routine to be called with a size |
172 | * causing a hard failure at allocation. For a standby, this would cause |
173 | * the instance to stop suddenly with a hard failure, preventing it to |
174 | * retry fetching WAL from one of its sources which could allow it to move |
175 | * on with replay without a manual restart. If the data comes from a past |
176 | * recycled segment and is still valid, then the allocation may succeed |
177 | * but record checks are going to fail so this would be short-lived. If |
178 | * the allocation fails because of a memory shortage, then this is not a |
179 | * hard failure either per the guarantee given by MCXT_ALLOC_NO_OOM. |
180 | */ |
181 | if (!AllocSizeIsValid(newSize)) |
182 | return false; |
183 | |
184 | #endif |
185 | |
186 | if (state->readRecordBuf) |
187 | pfree(state->readRecordBuf); |
188 | state->readRecordBuf = |
189 | (char *) palloc_extended(newSize, MCXT_ALLOC_NO_OOM); |
190 | if (state->readRecordBuf == NULL) |
191 | { |
192 | state->readRecordBufSize = 0; |
193 | return false; |
194 | } |
195 | state->readRecordBufSize = newSize; |
196 | return true; |
197 | } |
198 | |
199 | /* |
200 | * Attempt to read an XLOG record. |
201 | * |
202 | * If RecPtr is valid, try to read a record at that position. Otherwise |
203 | * try to read a record just after the last one previously read. |
204 | * |
205 | * If the read_page callback fails to read the requested data, NULL is |
206 | * returned. The callback is expected to have reported the error; errormsg |
207 | * is set to NULL. |
208 | * |
209 | * If the reading fails for some other reason, NULL is also returned, and |
210 | * *errormsg is set to a string with details of the failure. |
211 | * |
212 | * The returned pointer (or *errormsg) points to an internal buffer that's |
213 | * valid until the next call to XLogReadRecord. |
214 | */ |
215 | XLogRecord * |
216 | XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg) |
217 | { |
218 | XLogRecord *record; |
219 | XLogRecPtr targetPagePtr; |
220 | bool randAccess; |
221 | uint32 len, |
222 | total_len; |
223 | uint32 targetRecOff; |
224 | uint32 ; |
225 | bool ; |
226 | int readOff; |
227 | |
228 | /* |
229 | * randAccess indicates whether to verify the previous-record pointer of |
230 | * the record we're reading. We only do this if we're reading |
231 | * sequentially, which is what we initially assume. |
232 | */ |
233 | randAccess = false; |
234 | |
235 | /* reset error state */ |
236 | *errormsg = NULL; |
237 | state->errormsg_buf[0] = '\0'; |
238 | |
239 | ResetDecoder(state); |
240 | |
241 | if (RecPtr == InvalidXLogRecPtr) |
242 | { |
243 | /* No explicit start point; read the record after the one we just read */ |
244 | RecPtr = state->EndRecPtr; |
245 | |
246 | if (state->ReadRecPtr == InvalidXLogRecPtr) |
247 | randAccess = true; |
248 | |
249 | /* |
250 | * RecPtr is pointing to end+1 of the previous WAL record. If we're |
251 | * at a page boundary, no more records can fit on the current page. We |
252 | * must skip over the page header, but we can't do that until we've |
253 | * read in the page, since the header size is variable. |
254 | */ |
255 | } |
256 | else |
257 | { |
258 | /* |
259 | * Caller supplied a position to start at. |
260 | * |
261 | * In this case, the passed-in record pointer should already be |
262 | * pointing to a valid record starting position. |
263 | */ |
264 | Assert(XRecOffIsValid(RecPtr)); |
265 | randAccess = true; |
266 | } |
267 | |
268 | state->currRecPtr = RecPtr; |
269 | |
270 | targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ); |
271 | targetRecOff = RecPtr % XLOG_BLCKSZ; |
272 | |
273 | /* |
274 | * Read the page containing the record into state->readBuf. Request enough |
275 | * byte to cover the whole record header, or at least the part of it that |
276 | * fits on the same page. |
277 | */ |
278 | readOff = ReadPageInternal(state, |
279 | targetPagePtr, |
280 | Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ)); |
281 | if (readOff < 0) |
282 | goto err; |
283 | |
284 | /* |
285 | * ReadPageInternal always returns at least the page header, so we can |
286 | * examine it now. |
287 | */ |
288 | pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf); |
289 | if (targetRecOff == 0) |
290 | { |
291 | /* |
292 | * At page start, so skip over page header. |
293 | */ |
294 | RecPtr += pageHeaderSize; |
295 | targetRecOff = pageHeaderSize; |
296 | } |
297 | else if (targetRecOff < pageHeaderSize) |
298 | { |
299 | report_invalid_record(state, "invalid record offset at %X/%X" , |
300 | (uint32) (RecPtr >> 32), (uint32) RecPtr); |
301 | goto err; |
302 | } |
303 | |
304 | if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && |
305 | targetRecOff == pageHeaderSize) |
306 | { |
307 | report_invalid_record(state, "contrecord is requested by %X/%X" , |
308 | (uint32) (RecPtr >> 32), (uint32) RecPtr); |
309 | goto err; |
310 | } |
311 | |
312 | /* ReadPageInternal has verified the page header */ |
313 | Assert(pageHeaderSize <= readOff); |
314 | |
315 | /* |
316 | * Read the record length. |
317 | * |
318 | * NB: Even though we use an XLogRecord pointer here, the whole record |
319 | * header might not fit on this page. xl_tot_len is the first field of the |
320 | * struct, so it must be on this page (the records are MAXALIGNed), but we |
321 | * cannot access any other fields until we've verified that we got the |
322 | * whole header. |
323 | */ |
324 | record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ); |
325 | total_len = record->xl_tot_len; |
326 | |
327 | /* |
328 | * If the whole record header is on this page, validate it immediately. |
329 | * Otherwise do just a basic sanity check on xl_tot_len, and validate the |
330 | * rest of the header after reading it from the next page. The xl_tot_len |
331 | * check is necessary here to ensure that we enter the "Need to reassemble |
332 | * record" code path below; otherwise we might fail to apply |
333 | * ValidXLogRecordHeader at all. |
334 | */ |
335 | if (targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord) |
336 | { |
337 | if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, record, |
338 | randAccess)) |
339 | goto err; |
340 | gotheader = true; |
341 | } |
342 | else |
343 | { |
344 | /* XXX: more validation should be done here */ |
345 | if (total_len < SizeOfXLogRecord) |
346 | { |
347 | report_invalid_record(state, |
348 | "invalid record length at %X/%X: wanted %u, got %u" , |
349 | (uint32) (RecPtr >> 32), (uint32) RecPtr, |
350 | (uint32) SizeOfXLogRecord, total_len); |
351 | goto err; |
352 | } |
353 | gotheader = false; |
354 | } |
355 | |
356 | len = XLOG_BLCKSZ - RecPtr % XLOG_BLCKSZ; |
357 | if (total_len > len) |
358 | { |
359 | /* Need to reassemble record */ |
360 | char *contdata; |
361 | XLogPageHeader ; |
362 | char *buffer; |
363 | uint32 gotlen; |
364 | |
365 | /* |
366 | * Enlarge readRecordBuf as needed. |
367 | */ |
368 | if (total_len > state->readRecordBufSize && |
369 | !allocate_recordbuf(state, total_len)) |
370 | { |
371 | /* We treat this as a "bogus data" condition */ |
372 | report_invalid_record(state, "record length %u at %X/%X too long" , |
373 | total_len, |
374 | (uint32) (RecPtr >> 32), (uint32) RecPtr); |
375 | goto err; |
376 | } |
377 | |
378 | /* Copy the first fragment of the record from the first page. */ |
379 | memcpy(state->readRecordBuf, |
380 | state->readBuf + RecPtr % XLOG_BLCKSZ, len); |
381 | buffer = state->readRecordBuf + len; |
382 | gotlen = len; |
383 | |
384 | do |
385 | { |
386 | /* Calculate pointer to beginning of next page */ |
387 | targetPagePtr += XLOG_BLCKSZ; |
388 | |
389 | /* Wait for the next page to become available */ |
390 | readOff = ReadPageInternal(state, targetPagePtr, |
391 | Min(total_len - gotlen + SizeOfXLogShortPHD, |
392 | XLOG_BLCKSZ)); |
393 | |
394 | if (readOff < 0) |
395 | goto err; |
396 | |
397 | Assert(SizeOfXLogShortPHD <= readOff); |
398 | |
399 | /* Check that the continuation on next page looks valid */ |
400 | pageHeader = (XLogPageHeader) state->readBuf; |
401 | if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD)) |
402 | { |
403 | report_invalid_record(state, |
404 | "there is no contrecord flag at %X/%X" , |
405 | (uint32) (RecPtr >> 32), (uint32) RecPtr); |
406 | goto err; |
407 | } |
408 | |
409 | /* |
410 | * Cross-check that xlp_rem_len agrees with how much of the record |
411 | * we expect there to be left. |
412 | */ |
413 | if (pageHeader->xlp_rem_len == 0 || |
414 | total_len != (pageHeader->xlp_rem_len + gotlen)) |
415 | { |
416 | report_invalid_record(state, |
417 | "invalid contrecord length %u at %X/%X" , |
418 | pageHeader->xlp_rem_len, |
419 | (uint32) (RecPtr >> 32), (uint32) RecPtr); |
420 | goto err; |
421 | } |
422 | |
423 | /* Append the continuation from this page to the buffer */ |
424 | pageHeaderSize = XLogPageHeaderSize(pageHeader); |
425 | |
426 | if (readOff < pageHeaderSize) |
427 | readOff = ReadPageInternal(state, targetPagePtr, |
428 | pageHeaderSize); |
429 | |
430 | Assert(pageHeaderSize <= readOff); |
431 | |
432 | contdata = (char *) state->readBuf + pageHeaderSize; |
433 | len = XLOG_BLCKSZ - pageHeaderSize; |
434 | if (pageHeader->xlp_rem_len < len) |
435 | len = pageHeader->xlp_rem_len; |
436 | |
437 | if (readOff < pageHeaderSize + len) |
438 | readOff = ReadPageInternal(state, targetPagePtr, |
439 | pageHeaderSize + len); |
440 | |
441 | memcpy(buffer, (char *) contdata, len); |
442 | buffer += len; |
443 | gotlen += len; |
444 | |
445 | /* If we just reassembled the record header, validate it. */ |
446 | if (!gotheader) |
447 | { |
448 | record = (XLogRecord *) state->readRecordBuf; |
449 | if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, |
450 | record, randAccess)) |
451 | goto err; |
452 | gotheader = true; |
453 | } |
454 | } while (gotlen < total_len); |
455 | |
456 | Assert(gotheader); |
457 | |
458 | record = (XLogRecord *) state->readRecordBuf; |
459 | if (!ValidXLogRecord(state, record, RecPtr)) |
460 | goto err; |
461 | |
462 | pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf); |
463 | state->ReadRecPtr = RecPtr; |
464 | state->EndRecPtr = targetPagePtr + pageHeaderSize |
465 | + MAXALIGN(pageHeader->xlp_rem_len); |
466 | } |
467 | else |
468 | { |
469 | /* Wait for the record data to become available */ |
470 | readOff = ReadPageInternal(state, targetPagePtr, |
471 | Min(targetRecOff + total_len, XLOG_BLCKSZ)); |
472 | if (readOff < 0) |
473 | goto err; |
474 | |
475 | /* Record does not cross a page boundary */ |
476 | if (!ValidXLogRecord(state, record, RecPtr)) |
477 | goto err; |
478 | |
479 | state->EndRecPtr = RecPtr + MAXALIGN(total_len); |
480 | |
481 | state->ReadRecPtr = RecPtr; |
482 | } |
483 | |
484 | /* |
485 | * Special processing if it's an XLOG SWITCH record |
486 | */ |
487 | if (record->xl_rmid == RM_XLOG_ID && |
488 | (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH) |
489 | { |
490 | /* Pretend it extends to end of segment */ |
491 | state->EndRecPtr += state->wal_segment_size - 1; |
492 | state->EndRecPtr -= XLogSegmentOffset(state->EndRecPtr, state->wal_segment_size); |
493 | } |
494 | |
495 | if (DecodeXLogRecord(state, record, errormsg)) |
496 | return record; |
497 | else |
498 | return NULL; |
499 | |
500 | err: |
501 | |
502 | /* |
503 | * Invalidate the read state. We might read from a different source after |
504 | * failure. |
505 | */ |
506 | XLogReaderInvalReadState(state); |
507 | |
508 | if (state->errormsg_buf[0] != '\0') |
509 | *errormsg = state->errormsg_buf; |
510 | |
511 | return NULL; |
512 | } |
513 | |
514 | /* |
515 | * Read a single xlog page including at least [pageptr, reqLen] of valid data |
516 | * via the read_page() callback. |
517 | * |
518 | * Returns -1 if the required page cannot be read for some reason; errormsg_buf |
519 | * is set in that case (unless the error occurs in the read_page callback). |
520 | * |
521 | * We fetch the page from a reader-local cache if we know we have the required |
522 | * data and if there hasn't been any error since caching the data. |
523 | */ |
524 | static int |
525 | ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) |
526 | { |
527 | int readLen; |
528 | uint32 targetPageOff; |
529 | XLogSegNo targetSegNo; |
530 | XLogPageHeader hdr; |
531 | |
532 | Assert((pageptr % XLOG_BLCKSZ) == 0); |
533 | |
534 | XLByteToSeg(pageptr, targetSegNo, state->wal_segment_size); |
535 | targetPageOff = XLogSegmentOffset(pageptr, state->wal_segment_size); |
536 | |
537 | /* check whether we have all the requested data already */ |
538 | if (targetSegNo == state->readSegNo && targetPageOff == state->readOff && |
539 | reqLen <= state->readLen) |
540 | return state->readLen; |
541 | |
542 | /* |
543 | * Data is not in our buffer. |
544 | * |
545 | * Every time we actually read the page, even if we looked at parts of it |
546 | * before, we need to do verification as the read_page callback might now |
547 | * be rereading data from a different source. |
548 | * |
549 | * Whenever switching to a new WAL segment, we read the first page of the |
550 | * file and validate its header, even if that's not where the target |
551 | * record is. This is so that we can check the additional identification |
552 | * info that is present in the first page's "long" header. |
553 | */ |
554 | if (targetSegNo != state->readSegNo && targetPageOff != 0) |
555 | { |
556 | XLogRecPtr targetSegmentPtr = pageptr - targetPageOff; |
557 | |
558 | readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ, |
559 | state->currRecPtr, |
560 | state->readBuf, &state->readPageTLI); |
561 | if (readLen < 0) |
562 | goto err; |
563 | |
564 | /* we can be sure to have enough WAL available, we scrolled back */ |
565 | Assert(readLen == XLOG_BLCKSZ); |
566 | |
567 | if (!XLogReaderValidatePageHeader(state, targetSegmentPtr, |
568 | state->readBuf)) |
569 | goto err; |
570 | } |
571 | |
572 | /* |
573 | * First, read the requested data length, but at least a short page header |
574 | * so that we can validate it. |
575 | */ |
576 | readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD), |
577 | state->currRecPtr, |
578 | state->readBuf, &state->readPageTLI); |
579 | if (readLen < 0) |
580 | goto err; |
581 | |
582 | Assert(readLen <= XLOG_BLCKSZ); |
583 | |
584 | /* Do we have enough data to check the header length? */ |
585 | if (readLen <= SizeOfXLogShortPHD) |
586 | goto err; |
587 | |
588 | Assert(readLen >= reqLen); |
589 | |
590 | hdr = (XLogPageHeader) state->readBuf; |
591 | |
592 | /* still not enough */ |
593 | if (readLen < XLogPageHeaderSize(hdr)) |
594 | { |
595 | readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr), |
596 | state->currRecPtr, |
597 | state->readBuf, &state->readPageTLI); |
598 | if (readLen < 0) |
599 | goto err; |
600 | } |
601 | |
602 | /* |
603 | * Now that we know we have the full header, validate it. |
604 | */ |
605 | if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr)) |
606 | goto err; |
607 | |
608 | /* update read state information */ |
609 | state->readSegNo = targetSegNo; |
610 | state->readOff = targetPageOff; |
611 | state->readLen = readLen; |
612 | |
613 | return readLen; |
614 | |
615 | err: |
616 | XLogReaderInvalReadState(state); |
617 | return -1; |
618 | } |
619 | |
620 | /* |
621 | * Invalidate the xlogreader's read state to force a re-read. |
622 | */ |
623 | void |
624 | XLogReaderInvalReadState(XLogReaderState *state) |
625 | { |
626 | state->readSegNo = 0; |
627 | state->readOff = 0; |
628 | state->readLen = 0; |
629 | } |
630 | |
631 | /* |
632 | * Validate an XLOG record header. |
633 | * |
634 | * This is just a convenience subroutine to avoid duplicated code in |
635 | * XLogReadRecord. It's not intended for use from anywhere else. |
636 | */ |
637 | static bool |
638 | (XLogReaderState *state, XLogRecPtr RecPtr, |
639 | XLogRecPtr PrevRecPtr, XLogRecord *record, |
640 | bool randAccess) |
641 | { |
642 | if (record->xl_tot_len < SizeOfXLogRecord) |
643 | { |
644 | report_invalid_record(state, |
645 | "invalid record length at %X/%X: wanted %u, got %u" , |
646 | (uint32) (RecPtr >> 32), (uint32) RecPtr, |
647 | (uint32) SizeOfXLogRecord, record->xl_tot_len); |
648 | return false; |
649 | } |
650 | if (record->xl_rmid > RM_MAX_ID) |
651 | { |
652 | report_invalid_record(state, |
653 | "invalid resource manager ID %u at %X/%X" , |
654 | record->xl_rmid, (uint32) (RecPtr >> 32), |
655 | (uint32) RecPtr); |
656 | return false; |
657 | } |
658 | if (randAccess) |
659 | { |
660 | /* |
661 | * We can't exactly verify the prev-link, but surely it should be less |
662 | * than the record's own address. |
663 | */ |
664 | if (!(record->xl_prev < RecPtr)) |
665 | { |
666 | report_invalid_record(state, |
667 | "record with incorrect prev-link %X/%X at %X/%X" , |
668 | (uint32) (record->xl_prev >> 32), |
669 | (uint32) record->xl_prev, |
670 | (uint32) (RecPtr >> 32), (uint32) RecPtr); |
671 | return false; |
672 | } |
673 | } |
674 | else |
675 | { |
676 | /* |
677 | * Record's prev-link should exactly match our previous location. This |
678 | * check guards against torn WAL pages where a stale but valid-looking |
679 | * WAL record starts on a sector boundary. |
680 | */ |
681 | if (record->xl_prev != PrevRecPtr) |
682 | { |
683 | report_invalid_record(state, |
684 | "record with incorrect prev-link %X/%X at %X/%X" , |
685 | (uint32) (record->xl_prev >> 32), |
686 | (uint32) record->xl_prev, |
687 | (uint32) (RecPtr >> 32), (uint32) RecPtr); |
688 | return false; |
689 | } |
690 | } |
691 | |
692 | return true; |
693 | } |
694 | |
695 | |
696 | /* |
697 | * CRC-check an XLOG record. We do not believe the contents of an XLOG |
698 | * record (other than to the minimal extent of computing the amount of |
699 | * data to read in) until we've checked the CRCs. |
700 | * |
701 | * We assume all of the record (that is, xl_tot_len bytes) has been read |
702 | * into memory at *record. Also, ValidXLogRecordHeader() has accepted the |
703 | * record's header, which means in particular that xl_tot_len is at least |
704 | * SizeOfXlogRecord. |
705 | */ |
706 | static bool |
707 | ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr) |
708 | { |
709 | pg_crc32c crc; |
710 | |
711 | /* Calculate the CRC */ |
712 | INIT_CRC32C(crc); |
713 | COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord); |
714 | /* include the record header last */ |
715 | COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc)); |
716 | FIN_CRC32C(crc); |
717 | |
718 | if (!EQ_CRC32C(record->xl_crc, crc)) |
719 | { |
720 | report_invalid_record(state, |
721 | "incorrect resource manager data checksum in record at %X/%X" , |
722 | (uint32) (recptr >> 32), (uint32) recptr); |
723 | return false; |
724 | } |
725 | |
726 | return true; |
727 | } |
728 | |
729 | /* |
730 | * Validate a page header. |
731 | * |
732 | * Check if 'phdr' is valid as the header of the XLog page at position |
733 | * 'recptr'. |
734 | */ |
735 | bool |
736 | (XLogReaderState *state, XLogRecPtr recptr, |
737 | char *phdr) |
738 | { |
739 | XLogRecPtr recaddr; |
740 | XLogSegNo segno; |
741 | int32 offset; |
742 | XLogPageHeader hdr = (XLogPageHeader) phdr; |
743 | |
744 | Assert((recptr % XLOG_BLCKSZ) == 0); |
745 | |
746 | XLByteToSeg(recptr, segno, state->wal_segment_size); |
747 | offset = XLogSegmentOffset(recptr, state->wal_segment_size); |
748 | |
749 | XLogSegNoOffsetToRecPtr(segno, offset, state->wal_segment_size, recaddr); |
750 | |
751 | if (hdr->xlp_magic != XLOG_PAGE_MAGIC) |
752 | { |
753 | char fname[MAXFNAMELEN]; |
754 | |
755 | XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); |
756 | |
757 | report_invalid_record(state, |
758 | "invalid magic number %04X in log segment %s, offset %u" , |
759 | hdr->xlp_magic, |
760 | fname, |
761 | offset); |
762 | return false; |
763 | } |
764 | |
765 | if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0) |
766 | { |
767 | char fname[MAXFNAMELEN]; |
768 | |
769 | XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); |
770 | |
771 | report_invalid_record(state, |
772 | "invalid info bits %04X in log segment %s, offset %u" , |
773 | hdr->xlp_info, |
774 | fname, |
775 | offset); |
776 | return false; |
777 | } |
778 | |
779 | if (hdr->xlp_info & XLP_LONG_HEADER) |
780 | { |
781 | XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr; |
782 | |
783 | if (state->system_identifier && |
784 | longhdr->xlp_sysid != state->system_identifier) |
785 | { |
786 | char fhdrident_str[32]; |
787 | char sysident_str[32]; |
788 | |
789 | /* |
790 | * Format sysids separately to keep platform-dependent format code |
791 | * out of the translatable message string. |
792 | */ |
793 | snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT, |
794 | longhdr->xlp_sysid); |
795 | snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, |
796 | state->system_identifier); |
797 | report_invalid_record(state, |
798 | "WAL file is from different database system: WAL file database system identifier is %s, pg_control database system identifier is %s" , |
799 | fhdrident_str, sysident_str); |
800 | return false; |
801 | } |
802 | else if (longhdr->xlp_seg_size != state->wal_segment_size) |
803 | { |
804 | report_invalid_record(state, |
805 | "WAL file is from different database system: incorrect segment size in page header" ); |
806 | return false; |
807 | } |
808 | else if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ) |
809 | { |
810 | report_invalid_record(state, |
811 | "WAL file is from different database system: incorrect XLOG_BLCKSZ in page header" ); |
812 | return false; |
813 | } |
814 | } |
815 | else if (offset == 0) |
816 | { |
817 | char fname[MAXFNAMELEN]; |
818 | |
819 | XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); |
820 | |
821 | /* hmm, first page of file doesn't have a long header? */ |
822 | report_invalid_record(state, |
823 | "invalid info bits %04X in log segment %s, offset %u" , |
824 | hdr->xlp_info, |
825 | fname, |
826 | offset); |
827 | return false; |
828 | } |
829 | |
830 | /* |
831 | * Check that the address on the page agrees with what we expected. This |
832 | * check typically fails when an old WAL segment is recycled, and hasn't |
833 | * yet been overwritten with new data yet. |
834 | */ |
835 | if (hdr->xlp_pageaddr != recaddr) |
836 | { |
837 | char fname[MAXFNAMELEN]; |
838 | |
839 | XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); |
840 | |
841 | report_invalid_record(state, |
842 | "unexpected pageaddr %X/%X in log segment %s, offset %u" , |
843 | (uint32) (hdr->xlp_pageaddr >> 32), (uint32) hdr->xlp_pageaddr, |
844 | fname, |
845 | offset); |
846 | return false; |
847 | } |
848 | |
849 | /* |
850 | * Since child timelines are always assigned a TLI greater than their |
851 | * immediate parent's TLI, we should never see TLI go backwards across |
852 | * successive pages of a consistent WAL sequence. |
853 | * |
854 | * Sometimes we re-read a segment that's already been (partially) read. So |
855 | * we only verify TLIs for pages that are later than the last remembered |
856 | * LSN. |
857 | */ |
858 | if (recptr > state->latestPagePtr) |
859 | { |
860 | if (hdr->xlp_tli < state->latestPageTLI) |
861 | { |
862 | char fname[MAXFNAMELEN]; |
863 | |
864 | XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); |
865 | |
866 | report_invalid_record(state, |
867 | "out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u" , |
868 | hdr->xlp_tli, |
869 | state->latestPageTLI, |
870 | fname, |
871 | offset); |
872 | return false; |
873 | } |
874 | } |
875 | state->latestPagePtr = recptr; |
876 | state->latestPageTLI = hdr->xlp_tli; |
877 | |
878 | return true; |
879 | } |
880 | |
881 | #ifdef FRONTEND |
882 | /* |
883 | * Functions that are currently not needed in the backend, but are better |
884 | * implemented inside xlogreader.c because of the internal facilities available |
885 | * here. |
886 | */ |
887 | |
888 | /* |
889 | * Find the first record with an lsn >= RecPtr. |
890 | * |
891 | * Useful for checking whether RecPtr is a valid xlog address for reading, and |
892 | * to find the first valid address after some address when dumping records for |
893 | * debugging purposes. |
894 | */ |
895 | XLogRecPtr |
896 | XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr) |
897 | { |
898 | XLogReaderState saved_state = *state; |
899 | XLogRecPtr tmpRecPtr; |
900 | XLogRecPtr found = InvalidXLogRecPtr; |
901 | XLogPageHeader header; |
902 | char *errormsg; |
903 | |
904 | Assert(!XLogRecPtrIsInvalid(RecPtr)); |
905 | |
906 | /* |
907 | * skip over potential continuation data, keeping in mind that it may span |
908 | * multiple pages |
909 | */ |
910 | tmpRecPtr = RecPtr; |
911 | while (true) |
912 | { |
913 | XLogRecPtr targetPagePtr; |
914 | int targetRecOff; |
915 | uint32 pageHeaderSize; |
916 | int readLen; |
917 | |
918 | /* |
919 | * Compute targetRecOff. It should typically be equal or greater than |
920 | * short page-header since a valid record can't start anywhere before |
921 | * that, except when caller has explicitly specified the offset that |
922 | * falls somewhere there or when we are skipping multi-page |
923 | * continuation record. It doesn't matter though because |
924 | * ReadPageInternal() is prepared to handle that and will read at |
925 | * least short page-header worth of data |
926 | */ |
927 | targetRecOff = tmpRecPtr % XLOG_BLCKSZ; |
928 | |
929 | /* scroll back to page boundary */ |
930 | targetPagePtr = tmpRecPtr - targetRecOff; |
931 | |
932 | /* Read the page containing the record */ |
933 | readLen = ReadPageInternal(state, targetPagePtr, targetRecOff); |
934 | if (readLen < 0) |
935 | goto err; |
936 | |
937 | header = (XLogPageHeader) state->readBuf; |
938 | |
939 | pageHeaderSize = XLogPageHeaderSize(header); |
940 | |
941 | /* make sure we have enough data for the page header */ |
942 | readLen = ReadPageInternal(state, targetPagePtr, pageHeaderSize); |
943 | if (readLen < 0) |
944 | goto err; |
945 | |
946 | /* skip over potential continuation data */ |
947 | if (header->xlp_info & XLP_FIRST_IS_CONTRECORD) |
948 | { |
949 | /* |
950 | * If the length of the remaining continuation data is more than |
951 | * what can fit in this page, the continuation record crosses over |
952 | * this page. Read the next page and try again. xlp_rem_len in the |
953 | * next page header will contain the remaining length of the |
954 | * continuation data |
955 | * |
956 | * Note that record headers are MAXALIGN'ed |
957 | */ |
958 | if (MAXALIGN(header->xlp_rem_len) > (XLOG_BLCKSZ - pageHeaderSize)) |
959 | tmpRecPtr = targetPagePtr + XLOG_BLCKSZ; |
960 | else |
961 | { |
962 | /* |
963 | * The previous continuation record ends in this page. Set |
964 | * tmpRecPtr to point to the first valid record |
965 | */ |
966 | tmpRecPtr = targetPagePtr + pageHeaderSize |
967 | + MAXALIGN(header->xlp_rem_len); |
968 | break; |
969 | } |
970 | } |
971 | else |
972 | { |
973 | tmpRecPtr = targetPagePtr + pageHeaderSize; |
974 | break; |
975 | } |
976 | } |
977 | |
978 | /* |
979 | * we know now that tmpRecPtr is an address pointing to a valid XLogRecord |
980 | * because either we're at the first record after the beginning of a page |
981 | * or we just jumped over the remaining data of a continuation. |
982 | */ |
983 | while (XLogReadRecord(state, tmpRecPtr, &errormsg) != NULL) |
984 | { |
985 | /* continue after the record */ |
986 | tmpRecPtr = InvalidXLogRecPtr; |
987 | |
988 | /* past the record we've found, break out */ |
989 | if (RecPtr <= state->ReadRecPtr) |
990 | { |
991 | found = state->ReadRecPtr; |
992 | goto out; |
993 | } |
994 | } |
995 | |
996 | err: |
997 | out: |
998 | /* Reset state to what we had before finding the record */ |
999 | state->ReadRecPtr = saved_state.ReadRecPtr; |
1000 | state->EndRecPtr = saved_state.EndRecPtr; |
1001 | XLogReaderInvalReadState(state); |
1002 | |
1003 | return found; |
1004 | } |
1005 | |
1006 | #endif /* FRONTEND */ |
1007 | |
1008 | |
1009 | /* ---------------------------------------- |
1010 | * Functions for decoding the data and block references in a record. |
1011 | * ---------------------------------------- |
1012 | */ |
1013 | |
1014 | /* private function to reset the state between records */ |
1015 | static void |
1016 | ResetDecoder(XLogReaderState *state) |
1017 | { |
1018 | int block_id; |
1019 | |
1020 | state->decoded_record = NULL; |
1021 | |
1022 | state->main_data_len = 0; |
1023 | |
1024 | for (block_id = 0; block_id <= state->max_block_id; block_id++) |
1025 | { |
1026 | state->blocks[block_id].in_use = false; |
1027 | state->blocks[block_id].has_image = false; |
1028 | state->blocks[block_id].has_data = false; |
1029 | state->blocks[block_id].apply_image = false; |
1030 | } |
1031 | state->max_block_id = -1; |
1032 | } |
1033 | |
1034 | /* |
1035 | * Decode the previously read record. |
1036 | * |
1037 | * On error, a human-readable error message is returned in *errormsg, and |
1038 | * the return value is false. |
1039 | */ |
1040 | bool |
1041 | DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg) |
1042 | { |
1043 | /* |
1044 | * read next _size bytes from record buffer, but check for overrun first. |
1045 | */ |
1046 | #define (_dst, _size) \ |
1047 | do { \ |
1048 | if (remaining < _size) \ |
1049 | goto shortdata_err; \ |
1050 | memcpy(_dst, ptr, _size); \ |
1051 | ptr += _size; \ |
1052 | remaining -= _size; \ |
1053 | } while(0) |
1054 | |
1055 | char *ptr; |
1056 | uint32 remaining; |
1057 | uint32 datatotal; |
1058 | RelFileNode *rnode = NULL; |
1059 | uint8 block_id; |
1060 | |
1061 | ResetDecoder(state); |
1062 | |
1063 | state->decoded_record = record; |
1064 | state->record_origin = InvalidRepOriginId; |
1065 | |
1066 | ptr = (char *) record; |
1067 | ptr += SizeOfXLogRecord; |
1068 | remaining = record->xl_tot_len - SizeOfXLogRecord; |
1069 | |
1070 | /* Decode the headers */ |
1071 | datatotal = 0; |
1072 | while (remaining > datatotal) |
1073 | { |
1074 | COPY_HEADER_FIELD(&block_id, sizeof(uint8)); |
1075 | |
1076 | if (block_id == XLR_BLOCK_ID_DATA_SHORT) |
1077 | { |
1078 | /* XLogRecordDataHeaderShort */ |
1079 | uint8 main_data_len; |
1080 | |
1081 | COPY_HEADER_FIELD(&main_data_len, sizeof(uint8)); |
1082 | |
1083 | state->main_data_len = main_data_len; |
1084 | datatotal += main_data_len; |
1085 | break; /* by convention, the main data fragment is |
1086 | * always last */ |
1087 | } |
1088 | else if (block_id == XLR_BLOCK_ID_DATA_LONG) |
1089 | { |
1090 | /* XLogRecordDataHeaderLong */ |
1091 | uint32 main_data_len; |
1092 | |
1093 | COPY_HEADER_FIELD(&main_data_len, sizeof(uint32)); |
1094 | state->main_data_len = main_data_len; |
1095 | datatotal += main_data_len; |
1096 | break; /* by convention, the main data fragment is |
1097 | * always last */ |
1098 | } |
1099 | else if (block_id == XLR_BLOCK_ID_ORIGIN) |
1100 | { |
1101 | COPY_HEADER_FIELD(&state->record_origin, sizeof(RepOriginId)); |
1102 | } |
1103 | else if (block_id <= XLR_MAX_BLOCK_ID) |
1104 | { |
1105 | /* XLogRecordBlockHeader */ |
1106 | DecodedBkpBlock *blk; |
1107 | uint8 fork_flags; |
1108 | |
1109 | if (block_id <= state->max_block_id) |
1110 | { |
1111 | report_invalid_record(state, |
1112 | "out-of-order block_id %u at %X/%X" , |
1113 | block_id, |
1114 | (uint32) (state->ReadRecPtr >> 32), |
1115 | (uint32) state->ReadRecPtr); |
1116 | goto err; |
1117 | } |
1118 | state->max_block_id = block_id; |
1119 | |
1120 | blk = &state->blocks[block_id]; |
1121 | blk->in_use = true; |
1122 | blk->apply_image = false; |
1123 | |
1124 | COPY_HEADER_FIELD(&fork_flags, sizeof(uint8)); |
1125 | blk->forknum = fork_flags & BKPBLOCK_FORK_MASK; |
1126 | blk->flags = fork_flags; |
1127 | blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0); |
1128 | blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0); |
1129 | |
1130 | COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16)); |
1131 | /* cross-check that the HAS_DATA flag is set iff data_length > 0 */ |
1132 | if (blk->has_data && blk->data_len == 0) |
1133 | { |
1134 | report_invalid_record(state, |
1135 | "BKPBLOCK_HAS_DATA set, but no data included at %X/%X" , |
1136 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1137 | goto err; |
1138 | } |
1139 | if (!blk->has_data && blk->data_len != 0) |
1140 | { |
1141 | report_invalid_record(state, |
1142 | "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X" , |
1143 | (unsigned int) blk->data_len, |
1144 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1145 | goto err; |
1146 | } |
1147 | datatotal += blk->data_len; |
1148 | |
1149 | if (blk->has_image) |
1150 | { |
1151 | COPY_HEADER_FIELD(&blk->bimg_len, sizeof(uint16)); |
1152 | COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16)); |
1153 | COPY_HEADER_FIELD(&blk->bimg_info, sizeof(uint8)); |
1154 | |
1155 | blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0); |
1156 | |
1157 | if (blk->bimg_info & BKPIMAGE_IS_COMPRESSED) |
1158 | { |
1159 | if (blk->bimg_info & BKPIMAGE_HAS_HOLE) |
1160 | COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16)); |
1161 | else |
1162 | blk->hole_length = 0; |
1163 | } |
1164 | else |
1165 | blk->hole_length = BLCKSZ - blk->bimg_len; |
1166 | datatotal += blk->bimg_len; |
1167 | |
1168 | /* |
1169 | * cross-check that hole_offset > 0, hole_length > 0 and |
1170 | * bimg_len < BLCKSZ if the HAS_HOLE flag is set. |
1171 | */ |
1172 | if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) && |
1173 | (blk->hole_offset == 0 || |
1174 | blk->hole_length == 0 || |
1175 | blk->bimg_len == BLCKSZ)) |
1176 | { |
1177 | report_invalid_record(state, |
1178 | "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X" , |
1179 | (unsigned int) blk->hole_offset, |
1180 | (unsigned int) blk->hole_length, |
1181 | (unsigned int) blk->bimg_len, |
1182 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1183 | goto err; |
1184 | } |
1185 | |
1186 | /* |
1187 | * cross-check that hole_offset == 0 and hole_length == 0 if |
1188 | * the HAS_HOLE flag is not set. |
1189 | */ |
1190 | if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) && |
1191 | (blk->hole_offset != 0 || blk->hole_length != 0)) |
1192 | { |
1193 | report_invalid_record(state, |
1194 | "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X" , |
1195 | (unsigned int) blk->hole_offset, |
1196 | (unsigned int) blk->hole_length, |
1197 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1198 | goto err; |
1199 | } |
1200 | |
1201 | /* |
1202 | * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED |
1203 | * flag is set. |
1204 | */ |
1205 | if ((blk->bimg_info & BKPIMAGE_IS_COMPRESSED) && |
1206 | blk->bimg_len == BLCKSZ) |
1207 | { |
1208 | report_invalid_record(state, |
1209 | "BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X" , |
1210 | (unsigned int) blk->bimg_len, |
1211 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1212 | goto err; |
1213 | } |
1214 | |
1215 | /* |
1216 | * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor |
1217 | * IS_COMPRESSED flag is set. |
1218 | */ |
1219 | if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) && |
1220 | !(blk->bimg_info & BKPIMAGE_IS_COMPRESSED) && |
1221 | blk->bimg_len != BLCKSZ) |
1222 | { |
1223 | report_invalid_record(state, |
1224 | "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X" , |
1225 | (unsigned int) blk->data_len, |
1226 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1227 | goto err; |
1228 | } |
1229 | } |
1230 | if (!(fork_flags & BKPBLOCK_SAME_REL)) |
1231 | { |
1232 | COPY_HEADER_FIELD(&blk->rnode, sizeof(RelFileNode)); |
1233 | rnode = &blk->rnode; |
1234 | } |
1235 | else |
1236 | { |
1237 | if (rnode == NULL) |
1238 | { |
1239 | report_invalid_record(state, |
1240 | "BKPBLOCK_SAME_REL set but no previous rel at %X/%X" , |
1241 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1242 | goto err; |
1243 | } |
1244 | |
1245 | blk->rnode = *rnode; |
1246 | } |
1247 | COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber)); |
1248 | } |
1249 | else |
1250 | { |
1251 | report_invalid_record(state, |
1252 | "invalid block_id %u at %X/%X" , |
1253 | block_id, |
1254 | (uint32) (state->ReadRecPtr >> 32), |
1255 | (uint32) state->ReadRecPtr); |
1256 | goto err; |
1257 | } |
1258 | } |
1259 | |
1260 | if (remaining != datatotal) |
1261 | goto shortdata_err; |
1262 | |
1263 | /* |
1264 | * Ok, we've parsed the fragment headers, and verified that the total |
1265 | * length of the payload in the fragments is equal to the amount of data |
1266 | * left. Copy the data of each fragment to a separate buffer. |
1267 | * |
1268 | * We could just set up pointers into readRecordBuf, but we want to align |
1269 | * the data for the convenience of the callers. Backup images are not |
1270 | * copied, however; they don't need alignment. |
1271 | */ |
1272 | |
1273 | /* block data first */ |
1274 | for (block_id = 0; block_id <= state->max_block_id; block_id++) |
1275 | { |
1276 | DecodedBkpBlock *blk = &state->blocks[block_id]; |
1277 | |
1278 | if (!blk->in_use) |
1279 | continue; |
1280 | |
1281 | Assert(blk->has_image || !blk->apply_image); |
1282 | |
1283 | if (blk->has_image) |
1284 | { |
1285 | blk->bkp_image = ptr; |
1286 | ptr += blk->bimg_len; |
1287 | } |
1288 | if (blk->has_data) |
1289 | { |
1290 | if (!blk->data || blk->data_len > blk->data_bufsz) |
1291 | { |
1292 | if (blk->data) |
1293 | pfree(blk->data); |
1294 | |
1295 | /* |
1296 | * Force the initial request to be BLCKSZ so that we don't |
1297 | * waste time with lots of trips through this stanza as a |
1298 | * result of WAL compression. |
1299 | */ |
1300 | blk->data_bufsz = MAXALIGN(Max(blk->data_len, BLCKSZ)); |
1301 | blk->data = palloc(blk->data_bufsz); |
1302 | } |
1303 | memcpy(blk->data, ptr, blk->data_len); |
1304 | ptr += blk->data_len; |
1305 | } |
1306 | } |
1307 | |
1308 | /* and finally, the main data */ |
1309 | if (state->main_data_len > 0) |
1310 | { |
1311 | if (!state->main_data || state->main_data_len > state->main_data_bufsz) |
1312 | { |
1313 | if (state->main_data) |
1314 | pfree(state->main_data); |
1315 | |
1316 | /* |
1317 | * main_data_bufsz must be MAXALIGN'ed. In many xlog record |
1318 | * types, we omit trailing struct padding on-disk to save a few |
1319 | * bytes; but compilers may generate accesses to the xlog struct |
1320 | * that assume that padding bytes are present. If the palloc |
1321 | * request is not large enough to include such padding bytes then |
1322 | * we'll get valgrind complaints due to otherwise-harmless fetches |
1323 | * of the padding bytes. |
1324 | * |
1325 | * In addition, force the initial request to be reasonably large |
1326 | * so that we don't waste time with lots of trips through this |
1327 | * stanza. BLCKSZ / 2 seems like a good compromise choice. |
1328 | */ |
1329 | state->main_data_bufsz = MAXALIGN(Max(state->main_data_len, |
1330 | BLCKSZ / 2)); |
1331 | state->main_data = palloc(state->main_data_bufsz); |
1332 | } |
1333 | memcpy(state->main_data, ptr, state->main_data_len); |
1334 | ptr += state->main_data_len; |
1335 | } |
1336 | |
1337 | return true; |
1338 | |
1339 | shortdata_err: |
1340 | report_invalid_record(state, |
1341 | "record with invalid length at %X/%X" , |
1342 | (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr); |
1343 | err: |
1344 | *errormsg = state->errormsg_buf; |
1345 | |
1346 | return false; |
1347 | } |
1348 | |
1349 | /* |
1350 | * Returns information about the block that a block reference refers to. |
1351 | * |
1352 | * If the WAL record contains a block reference with the given ID, *rnode, |
1353 | * *forknum, and *blknum are filled in (if not NULL), and returns true. |
1354 | * Otherwise returns false. |
1355 | */ |
1356 | bool |
1357 | XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, |
1358 | RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum) |
1359 | { |
1360 | DecodedBkpBlock *bkpb; |
1361 | |
1362 | if (!record->blocks[block_id].in_use) |
1363 | return false; |
1364 | |
1365 | bkpb = &record->blocks[block_id]; |
1366 | if (rnode) |
1367 | *rnode = bkpb->rnode; |
1368 | if (forknum) |
1369 | *forknum = bkpb->forknum; |
1370 | if (blknum) |
1371 | *blknum = bkpb->blkno; |
1372 | return true; |
1373 | } |
1374 | |
1375 | /* |
1376 | * Returns the data associated with a block reference, or NULL if there is |
1377 | * no data (e.g. because a full-page image was taken instead). The returned |
1378 | * pointer points to a MAXALIGNed buffer. |
1379 | */ |
1380 | char * |
1381 | XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len) |
1382 | { |
1383 | DecodedBkpBlock *bkpb; |
1384 | |
1385 | if (!record->blocks[block_id].in_use) |
1386 | return NULL; |
1387 | |
1388 | bkpb = &record->blocks[block_id]; |
1389 | |
1390 | if (!bkpb->has_data) |
1391 | { |
1392 | if (len) |
1393 | *len = 0; |
1394 | return NULL; |
1395 | } |
1396 | else |
1397 | { |
1398 | if (len) |
1399 | *len = bkpb->data_len; |
1400 | return bkpb->data; |
1401 | } |
1402 | } |
1403 | |
1404 | /* |
1405 | * Restore a full-page image from a backup block attached to an XLOG record. |
1406 | * |
1407 | * Returns the buffer number containing the page. |
1408 | */ |
1409 | bool |
1410 | RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) |
1411 | { |
1412 | DecodedBkpBlock *bkpb; |
1413 | char *ptr; |
1414 | PGAlignedBlock tmp; |
1415 | |
1416 | if (!record->blocks[block_id].in_use) |
1417 | return false; |
1418 | if (!record->blocks[block_id].has_image) |
1419 | return false; |
1420 | |
1421 | bkpb = &record->blocks[block_id]; |
1422 | ptr = bkpb->bkp_image; |
1423 | |
1424 | if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED) |
1425 | { |
1426 | /* If a backup block image is compressed, decompress it */ |
1427 | if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data, |
1428 | BLCKSZ - bkpb->hole_length, true) < 0) |
1429 | { |
1430 | report_invalid_record(record, "invalid compressed image at %X/%X, block %d" , |
1431 | (uint32) (record->ReadRecPtr >> 32), |
1432 | (uint32) record->ReadRecPtr, |
1433 | block_id); |
1434 | return false; |
1435 | } |
1436 | ptr = tmp.data; |
1437 | } |
1438 | |
1439 | /* generate page, taking into account hole if necessary */ |
1440 | if (bkpb->hole_length == 0) |
1441 | { |
1442 | memcpy(page, ptr, BLCKSZ); |
1443 | } |
1444 | else |
1445 | { |
1446 | memcpy(page, ptr, bkpb->hole_offset); |
1447 | /* must zero-fill the hole */ |
1448 | MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length); |
1449 | memcpy(page + (bkpb->hole_offset + bkpb->hole_length), |
1450 | ptr + bkpb->hole_offset, |
1451 | BLCKSZ - (bkpb->hole_offset + bkpb->hole_length)); |
1452 | } |
1453 | |
1454 | return true; |
1455 | } |
1456 | |