PostgreSQL Source Code git master
xlogreader.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * xlogreader.h
4 * Definitions for the generic XLog reading facility
5 *
6 * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/include/access/xlogreader.h
10 *
11 * NOTES
12 * See the definition of the XLogReaderState struct for instructions on
13 * how to use the XLogReader infrastructure.
14 *
15 * The basic idea is to allocate an XLogReaderState via
16 * XLogReaderAllocate(), position the reader to the first record with
17 * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
18 * until it returns NULL.
19 *
20 * Callers supply a page_read callback if they want to call
21 * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
22 * otherwise. The WALRead function can be used as a helper to write
23 * page_read callbacks, but it is not mandatory; callers that use it,
24 * must supply segment_open callbacks. The segment_close callback
25 * must always be supplied.
26 *
27 * After reading a record with XLogReadRecord(), it's decomposed into
28 * the per-block and main data parts, and the parts can be accessed
29 * with the XLogRec* macros and functions. You can also decode a
30 * record that's already constructed in memory, without reading from
31 * disk, by calling the DecodeXLogRecord() function.
32 *-------------------------------------------------------------------------
33 */
34#ifndef XLOGREADER_H
35#define XLOGREADER_H
36
37#ifndef FRONTEND
38#include "access/transam.h"
39#endif
40
41#include "access/xlogrecord.h"
42#include "storage/buf.h"
43
44/* WALOpenSegment represents a WAL segment being read. */
45typedef struct WALOpenSegment
46{
47 int ws_file; /* segment file descriptor */
48 XLogSegNo ws_segno; /* segment number */
49 TimeLineID ws_tli; /* timeline ID of the currently open file */
51
52/* WALSegmentContext carries context information about WAL segments to read */
53typedef struct WALSegmentContext
54{
58
60
61/* Function type definitions for various xlogreader interactions */
63 XLogRecPtr targetPagePtr,
64 int reqLen,
65 XLogRecPtr targetRecPtr,
66 char *readBuf);
68 XLogSegNo nextSegNo,
69 TimeLineID *tli_p);
71
72typedef struct XLogReaderRoutine
73{
74 /*
75 * Data input callback
76 *
77 * This callback shall read at least reqLen valid bytes of the xlog page
78 * starting at targetPagePtr, and store them in readBuf. The callback
79 * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
80 * -1 on failure. The callback shall sleep, if necessary, to wait for the
81 * requested bytes to become available. The callback will not be invoked
82 * again for the same page unless more than the returned number of bytes
83 * are needed.
84 *
85 * targetRecPtr is the position of the WAL record we're reading. Usually
86 * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
87 * to read and verify the page or segment header, before it reads the
88 * actual WAL record it's interested in. In that case, targetRecPtr can
89 * be used to determine which timeline to read the page from.
90 *
91 * The callback shall set ->seg.ws_tli to the TLI of the file the page was
92 * read from.
93 */
95
96 /*
97 * Callback to open the specified WAL segment for reading. ->seg.ws_file
98 * shall be set to the file descriptor of the opened segment. In case of
99 * failure, an error shall be raised by the callback and it shall not
100 * return.
101 *
102 * "nextSegNo" is the number of the segment to be opened.
103 *
104 * "tli_p" is an input/output argument. WALRead() uses it to pass the
105 * timeline in which the new segment should be found, but the callback can
106 * use it to return the TLI that it actually opened.
107 */
109
110 /*
111 * WAL segment close callback. ->seg.ws_file shall be set to a negative
112 * number.
113 */
116
117#define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
118
119typedef struct
120{
121 /* Is this block ref in use? */
122 bool in_use;
123
124 /* Identify the block this refers to */
128
129 /* Prefetching workspace. */
131
132 /* copy of the fork_flags field from the XLogRecordBlockHeader */
134
135 /* Information on full-page image, if any */
136 bool has_image; /* has image, even for consistency checking */
137 bool apply_image; /* has image that should be restored */
143
144 /* Buffer holding the rmgr-specific data associated with this block */
146 char *data;
149
150/*
151 * The decoded contents of a record. This occupies a contiguous region of
152 * memory, with main_data and blocks[n].data pointing to memory after the
153 * members declared here.
154 */
155typedef struct DecodedXLogRecord
156{
157 /* Private member used for resource management. */
158 size_t size; /* total size of decoded record */
159 bool oversized; /* outside the regular decode buffer? */
160 struct DecodedXLogRecord *next; /* decoded record queue link */
161
162 /* Public members. */
163 XLogRecPtr lsn; /* location */
164 XLogRecPtr next_lsn; /* location of next record */
165 XLogRecord header; /* header */
167 TransactionId toplevel_xid; /* XID of top-level transaction */
168 char *main_data; /* record's main data portion */
169 uint32 main_data_len; /* main data portion's length */
170 int max_block_id; /* highest block_id in use (-1 if none) */
173
175{
176 /*
177 * Operational callbacks
178 */
180
181 /* ----------------------------------------
182 * Public parameters
183 * ----------------------------------------
184 */
185
186 /*
187 * System identifier of the xlog files we're about to read. Set to zero
188 * (the default value) if unknown or unimportant.
189 */
191
192 /*
193 * Opaque data for callbacks to use. Not used by XLogReader.
194 */
196
197 /*
198 * Start and end point of last record read. EndRecPtr is also used as the
199 * position to read next. Calling XLogBeginRead() sets EndRecPtr to the
200 * starting position and ReadRecPtr to invalid.
201 *
202 * Start and end point of last record returned by XLogReadRecord(). These
203 * are also available as record->lsn and record->next_lsn.
204 */
205 XLogRecPtr ReadRecPtr; /* start of last record read */
206 XLogRecPtr EndRecPtr; /* end+1 of last record read */
207
208 /*
209 * Set at the end of recovery: the start point of a partial record at the
210 * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
211 * location of its first contrecord that went missing.
212 */
215 /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
217
218
219 /* ----------------------------------------
220 * Decoded representation of current record
221 *
222 * Use XLogRecGet* functions to investigate the record; these fields
223 * should not be accessed directly.
224 * ----------------------------------------
225 * Start and end point of the last record read and decoded by
226 * XLogReadRecord(). NextRecPtr is also used as the position to decode
227 * next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to the
228 * requested starting position.
229 */
230 XLogRecPtr DecodeRecPtr; /* start of last record decoded */
231 XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
232 XLogRecPtr PrevRecPtr; /* start of previous record decoded */
233
234 /* Last record returned by XLogReadRecord(). */
236
237 /* ----------------------------------------
238 * private/internal state
239 * ----------------------------------------
240 */
241
242 /*
243 * Buffer for decoded records. This is a circular buffer, though
244 * individual records can't be split in the middle, so some space is often
245 * wasted at the end. Oversized records that don't fit in this space are
246 * allocated separately.
247 */
250 bool free_decode_buffer; /* need to free? */
251 char *decode_buffer_head; /* data is read from the head */
252 char *decode_buffer_tail; /* new data is written at the tail */
253
254 /*
255 * Queue of records that have been decoded. This is a linked list that
256 * usually consists of consecutive records in decode_buffer, but may also
257 * contain oversized records allocated with palloc().
258 */
259 DecodedXLogRecord *decode_queue_head; /* oldest decoded record */
260 DecodedXLogRecord *decode_queue_tail; /* newest decoded record */
261
262 /*
263 * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
264 * readLen bytes)
265 */
266 char *readBuf;
268
269 /* last read XLOG position for data currently in readBuf */
273
274 /*
275 * beginning of prior page read, and its TLI. Doesn't necessarily
276 * correspond to what's in readBuf; used for timeline sanity checks.
277 */
280
281 /* beginning of the WAL record being read. */
283 /* timeline to read it from, 0 if a lookup is required */
285
286 /*
287 * Safe point to read to in currTLI if current TLI is historical
288 * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
289 *
290 * Actually set to the start of the segment containing the timeline switch
291 * that ends currTLI's validity, not the LSN of the switch its self, since
292 * we can't assume the old segment will be present.
293 */
295
296 /*
297 * If currTLI is not the most recent known timeline, the next timeline to
298 * read from when currTLIValidUntil is reached.
299 */
301
302 /*
303 * Buffer for current ReadRecord result (expandable), used when a record
304 * crosses a page boundary.
305 */
308
309 /* Buffer to hold error message */
312
313 /*
314 * Flag to indicate to XLogPageReadCB that it should not block waiting for
315 * data.
316 */
318};
319
320/*
321 * Check if XLogNextRecord() has any more queued records or an error to return.
322 */
323static inline bool
325{
326 return (state->decode_queue_head != NULL) || state->errormsg_deferred;
327}
328
329/* Get a new XLogReader */
331 const char *waldir,
332 XLogReaderRoutine *routine,
333 void *private_data);
334
335/* Free an XLogReader */
337
338/* Optionally provide a circular decoding buffer to allow readahead. */
340 void *buffer,
341 size_t size);
342
343/* Position the XLogReader to given record */
344extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
346
347/* Return values from XLogPageReadCB. */
349{
350 XLREAD_SUCCESS = 0, /* record is successfully read */
351 XLREAD_FAIL = -1, /* failed during reading a record */
352 XLREAD_WOULDBLOCK = -2, /* nonblocking mode only, no data */
354
355/* Read the next XLog record. Returns NULL on end-of-WAL or failure */
357 char **errormsg);
358
359/* Consume the next record or error. */
361 char **errormsg);
362
363/* Release the previously returned record, if necessary. */
365
366/* Try to read ahead, if there is data and space. */
368 bool nonblocking);
369
370/* Validate a page */
372 XLogRecPtr recptr, char *phdr);
373
374/* Forget error produced by XLogReaderValidatePageHeader(). */
376
377/*
378 * Error information from WALRead that both backend and frontend caller can
379 * process. Currently only errors from pg_pread can be reported.
380 */
381typedef struct WALReadError
382{
383 int wre_errno; /* errno set by the last pg_pread() */
384 int wre_off; /* Offset we tried to read from. */
385 int wre_req; /* Bytes requested to be read. */
386 int wre_read; /* Bytes read by the last read(). */
387 WALOpenSegment wre_seg; /* Segment we tried to read from. */
389
390extern bool WALRead(XLogReaderState *state,
391 char *buf, XLogRecPtr startptr, Size count,
392 TimeLineID tli, WALReadError *errinfo);
393
394/* Functions for decoding an XLogRecord */
395
396extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
398 DecodedXLogRecord *decoded,
399 XLogRecord *record,
400 XLogRecPtr lsn,
401 char **errormsg);
402
403/*
404 * Macros that provide access to parts of the record most recently returned by
405 * XLogReadRecord() or XLogNextRecord().
406 */
407#define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
408#define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
409#define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
410#define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
411#define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
412#define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
413#define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
414#define XLogRecGetData(decoder) ((decoder)->record->main_data)
415#define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
416#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
417#define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
418#define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
419#define XLogRecHasBlockRef(decoder, block_id) \
420 (((decoder)->record->max_block_id >= (block_id)) && \
421 ((decoder)->record->blocks[block_id].in_use))
422#define XLogRecHasBlockImage(decoder, block_id) \
423 ((decoder)->record->blocks[block_id].has_image)
424#define XLogRecBlockImageApply(decoder, block_id) \
425 ((decoder)->record->blocks[block_id].apply_image)
426#define XLogRecHasBlockData(decoder, block_id) \
427 ((decoder)->record->blocks[block_id].has_data)
428
429#ifndef FRONTEND
431#endif
432
433extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
434extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
435extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
436 RelFileLocator *rlocator, ForkNumber *forknum,
437 BlockNumber *blknum);
438extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
439 RelFileLocator *rlocator, ForkNumber *forknum,
440 BlockNumber *blknum,
441 Buffer *prefetch_buffer);
442
443#endif /* XLOGREADER_H */
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
uint8_t uint8
Definition: c.h:541
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:475
uint64_t uint64
Definition: c.h:544
uint16_t uint16
Definition: c.h:542
uint32_t uint32
Definition: c.h:543
uint32 TransactionId
Definition: c.h:662
size_t Size
Definition: c.h:615
#define MAXPGPATH
const void size_t len
static char * buf
Definition: pg_test_fsync.c:72
ForkNumber
Definition: relpath.h:56
uint16 hole_length
Definition: xlogreader.h:140
char * bkp_image
Definition: xlogreader.h:138
Buffer prefetch_buffer
Definition: xlogreader.h:130
RelFileLocator rlocator
Definition: xlogreader.h:125
BlockNumber blkno
Definition: xlogreader.h:127
ForkNumber forknum
Definition: xlogreader.h:126
uint16 hole_offset
Definition: xlogreader.h:139
XLogRecord header
Definition: xlogreader.h:165
XLogRecPtr next_lsn
Definition: xlogreader.h:164
struct DecodedXLogRecord * next
Definition: xlogreader.h:160
TransactionId toplevel_xid
Definition: xlogreader.h:167
uint32 main_data_len
Definition: xlogreader.h:169
RepOriginId record_origin
Definition: xlogreader.h:166
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER]
Definition: xlogreader.h:171
XLogRecPtr lsn
Definition: xlogreader.h:163
XLogSegNo ws_segno
Definition: xlogreader.h:48
TimeLineID ws_tli
Definition: xlogreader.h:49
WALOpenSegment wre_seg
Definition: xlogreader.h:387
char ws_dir[MAXPGPATH]
Definition: xlogreader.h:55
WALSegmentCloseCB segment_close
Definition: xlogreader.h:114
WALSegmentOpenCB segment_open
Definition: xlogreader.h:108
XLogPageReadCB page_read
Definition: xlogreader.h:94
XLogRecPtr missingContrecPtr
Definition: xlogreader.h:214
DecodedXLogRecord * record
Definition: xlogreader.h:235
char * errormsg_buf
Definition: xlogreader.h:310
WALSegmentContext segcxt
Definition: xlogreader.h:270
XLogRecPtr PrevRecPtr
Definition: xlogreader.h:232
size_t decode_buffer_size
Definition: xlogreader.h:249
DecodedXLogRecord * decode_queue_head
Definition: xlogreader.h:259
XLogRecPtr EndRecPtr
Definition: xlogreader.h:206
uint32 readRecordBufSize
Definition: xlogreader.h:307
uint64 system_identifier
Definition: xlogreader.h:190
bool free_decode_buffer
Definition: xlogreader.h:250
char * decode_buffer
Definition: xlogreader.h:248
TimeLineID nextTLI
Definition: xlogreader.h:300
XLogRecPtr currTLIValidUntil
Definition: xlogreader.h:294
char * readRecordBuf
Definition: xlogreader.h:306
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:205
XLogRecPtr abortedRecPtr
Definition: xlogreader.h:213
DecodedXLogRecord * decode_queue_tail
Definition: xlogreader.h:260
XLogReaderRoutine routine
Definition: xlogreader.h:179
char * decode_buffer_head
Definition: xlogreader.h:251
bool errormsg_deferred
Definition: xlogreader.h:311
TimeLineID latestPageTLI
Definition: xlogreader.h:279
TimeLineID currTLI
Definition: xlogreader.h:284
XLogRecPtr overwrittenRecPtr
Definition: xlogreader.h:216
XLogRecPtr DecodeRecPtr
Definition: xlogreader.h:230
XLogRecPtr currRecPtr
Definition: xlogreader.h:282
XLogRecPtr latestPagePtr
Definition: xlogreader.h:278
char * decode_buffer_tail
Definition: xlogreader.h:252
WALOpenSegment seg
Definition: xlogreader.h:271
void * private_data
Definition: xlogreader.h:195
XLogRecPtr NextRecPtr
Definition: xlogreader.h:231
Definition: regguts.h:323
int wal_segment_size
Definition: xlog.c:145
uint16 RepOriginId
Definition: xlogdefs.h:69
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:63
uint64 XLogSegNo
Definition: xlogdefs.h:52
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
Definition: xlogreader.c:2017
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:107
struct XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:390
void XLogReaderSetDecodeBuffer(XLogReaderState *state, void *buffer, size_t size)
Definition: xlogreader.c:91
DecodedXLogRecord * XLogReadAhead(XLogReaderState *state, bool nonblocking)
Definition: xlogreader.c:977
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2045
DecodedXLogRecord * XLogNextRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:326
static bool XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
Definition: xlogreader.h:324
struct DecodedXLogRecord DecodedXLogRecord
bool WALRead(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALReadError *errinfo)
Definition: xlogreader.c:1514
struct WALReadError WALReadError
void(* WALSegmentCloseCB)(XLogReaderState *xlogreader)
Definition: xlogreader.h:70
void XLogReaderResetError(XLogReaderState *state)
Definition: xlogreader.c:1376
bool XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, char *phdr)
Definition: xlogreader.c:1235
FullTransactionId XLogRecGetFullXid(XLogReaderState *record)
Definition: xlogreader.c:2187
void XLogReaderFree(XLogReaderState *state)
Definition: xlogreader.c:162
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1991
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1682
struct WALOpenSegment WALOpenSegment
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1649
XLogPageReadResult
Definition: xlogreader.h:349
@ XLREAD_WOULDBLOCK
Definition: xlogreader.h:352
@ XLREAD_SUCCESS
Definition: xlogreader.h:350
@ XLREAD_FAIL
Definition: xlogreader.h:351
XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:1394
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:232
struct WALSegmentContext WALSegmentContext
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:2076
void(* WALSegmentOpenCB)(XLogReaderState *xlogreader, XLogSegNo nextSegNo, TimeLineID *tli_p)
Definition: xlogreader.h:67
int(* XLogPageReadCB)(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
Definition: xlogreader.h:62
struct XLogReaderRoutine XLogReaderRoutine
XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state)
Definition: xlogreader.c:250
static XLogReaderState * xlogreader
Definition: xlogrecovery.c:191