PostgreSQL Source Code git master
bufpage.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * bufpage.h
4 * Standard POSTGRES buffer page definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/storage/bufpage.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef BUFPAGE_H
15#define BUFPAGE_H
16
17#include "access/xlogdefs.h"
18#include "storage/block.h"
19#include "storage/off.h"
20
21/* GUC variable */
23
24/*
25 * A postgres disk page is an abstraction layered on top of a postgres
26 * disk block (which is simply a unit of i/o, see block.h).
27 *
28 * specifically, while a disk block can be unformatted, a postgres
29 * disk page is always a slotted page of the form:
30 *
31 * +----------------+---------------------------------+
32 * | PageHeaderData | linp1 linp2 linp3 ... |
33 * +-----------+----+---------------------------------+
34 * | ... linpN | |
35 * +-----------+--------------------------------------+
36 * | ^ pd_lower |
37 * | |
38 * | v pd_upper |
39 * +-------------+------------------------------------+
40 * | | tupleN ... |
41 * +-------------+------------------+-----------------+
42 * | ... tuple3 tuple2 tuple1 | "special space" |
43 * +--------------------------------+-----------------+
44 * ^ pd_special
45 *
46 * a page is full when nothing can be added between pd_lower and
47 * pd_upper.
48 *
49 * all blocks written out by an access method must be disk pages.
50 *
51 * EXCEPTIONS:
52 *
53 * obviously, a page is not formatted before it is initialized by
54 * a call to PageInit.
55 *
56 * NOTES:
57 *
58 * linp1..N form an ItemId (line pointer) array. ItemPointers point
59 * to a physical block number and a logical offset (line pointer
60 * number) within that block/page. Note that OffsetNumbers
61 * conventionally start at 1, not 0.
62 *
63 * tuple1..N are added "backwards" on the page. Since an ItemPointer
64 * offset is used to access an ItemId entry rather than an actual
65 * byte-offset position, tuples can be physically shuffled on a page
66 * whenever the need arises. This indirection also keeps crash recovery
67 * relatively simple, because the low-level details of page space
68 * management can be controlled by standard buffer page code during
69 * logging, and during recovery.
70 *
71 * AM-generic per-page information is kept in PageHeaderData.
72 *
73 * AM-specific per-page data (if any) is kept in the area marked "special
74 * space"; each AM has an "opaque" structure defined somewhere that is
75 * stored as the page trailer. An access method should always
76 * initialize its pages with PageInit and then set its own opaque
77 * fields.
78 */
79
80typedef char PageData;
81typedef PageData *Page;
82
83
84/*
85 * location (byte offset) within a page.
86 *
87 * note that this is actually limited to 2^15 because we have limited
88 * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
89 */
91
92
93/*
94 * For historical reasons, the 64-bit LSN value is stored as two 32-bit
95 * values.
96 */
97typedef struct
98{
99 uint32 xlogid; /* high bits */
100 uint32 xrecoff; /* low bits */
102
103static inline XLogRecPtr
105{
106 return (uint64) val.xlogid << 32 | val.xrecoff;
107}
108
109#define PageXLogRecPtrSet(ptr, lsn) \
110 ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
111
112/*
113 * disk page organization
114 *
115 * space management information generic to any page
116 *
117 * pd_lsn - identifies xlog record for last change to this page.
118 * pd_checksum - page checksum, if set.
119 * pd_flags - flag bits.
120 * pd_lower - offset to start of free space.
121 * pd_upper - offset to end of free space.
122 * pd_special - offset to start of special space.
123 * pd_pagesize_version - size in bytes and page layout version number.
124 * pd_prune_xid - oldest XID among potentially prunable tuples on page.
125 *
126 * The LSN is used by the buffer manager to enforce the basic rule of WAL:
127 * "thou shalt write xlog before data". A dirty buffer cannot be dumped
128 * to disk until xlog has been flushed at least as far as the page's LSN.
129 *
130 * pd_checksum stores the page checksum, if it has been set for this page;
131 * zero is a valid value for a checksum. If a checksum is not in use then
132 * we leave the field unset. This will typically mean the field is zero
133 * though non-zero values may also be present if databases have been
134 * pg_upgraded from releases prior to 9.3, when the same byte offset was
135 * used to store the current timelineid when the page was last updated.
136 * Note that there is no indication on a page as to whether the checksum
137 * is valid or not, a deliberate design choice which avoids the problem
138 * of relying on the page contents to decide whether to verify it. Hence
139 * there are no flag bits relating to checksums.
140 *
141 * pd_prune_xid is a hint field that helps determine whether pruning will be
142 * useful. It is currently unused in index pages.
143 *
144 * The page version number and page size are packed together into a single
145 * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
146 * there was no concept of a page version number, and doing it this way
147 * lets us pretend that pre-7.3 databases have page version number zero.
148 * We constrain page sizes to be multiples of 256, leaving the low eight
149 * bits available for a version number.
150 *
151 * Minimum possible page size is perhaps 64B to fit page header, opaque space
152 * and a minimal tuple; of course, in reality you want it much bigger, so
153 * the constraint on pagesize mod 256 is not an important restriction.
154 * On the high end, we can only support pages up to 32KB because lp_off/lp_len
155 * are 15 bits.
156 */
157
158typedef struct PageHeaderData
159{
160 /* XXX LSN is member of *any* block, not only page-organized ones */
161 PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
162 * record for last change to this page */
163 uint16 pd_checksum; /* checksum */
164 uint16 pd_flags; /* flag bits, see below */
165 LocationIndex pd_lower; /* offset to start of free space */
166 LocationIndex pd_upper; /* offset to end of free space */
167 LocationIndex pd_special; /* offset to start of special space */
169 TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
170 ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
172
174
175/*
176 * pd_flags contains the following flag bits. Undefined bits are initialized
177 * to zero and may be used in the future.
178 *
179 * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
180 * pd_lower. This should be considered a hint rather than the truth, since
181 * changes to it are not WAL-logged.
182 *
183 * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
184 * page for its new tuple version; this suggests that a prune is needed.
185 * Again, this is just a hint.
186 */
187#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
188#define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
189#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
190 * everyone */
192#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
193
194/*
195 * Page layout version number 0 is for pre-7.3 Postgres releases.
196 * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
197 * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
198 * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
199 * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
200 * added the pd_flags field (by stealing some bits from pd_tli),
201 * as well as adding the pd_prune_xid field (which enlarges the header).
202 *
203 * As of Release 9.3, the checksum version must also be considered when
204 * handling pages.
205 */
206#define PG_PAGE_LAYOUT_VERSION 4
207#define PG_DATA_CHECKSUM_VERSION 1
208
209/* ----------------------------------------------------------------
210 * page support functions
211 * ----------------------------------------------------------------
212 */
213
214/*
215 * line pointer(s) do not count as part of header
216 */
217#define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
218
219/*
220 * PageIsEmpty
221 * returns true iff no itemid has been allocated on the page
222 */
223static inline bool
224PageIsEmpty(const PageData *page)
225{
226 return ((const PageHeaderData *) page)->pd_lower <= SizeOfPageHeaderData;
227}
228
229/*
230 * PageIsNew
231 * returns true iff page has not been initialized (by PageInit)
232 */
233static inline bool
234PageIsNew(const PageData *page)
235{
236 return ((const PageHeaderData *) page)->pd_upper == 0;
237}
238
239/*
240 * PageGetItemId
241 * Returns an item identifier of a page.
242 */
243static inline ItemId
244PageGetItemId(Page page, OffsetNumber offsetNumber)
245{
246 return &((PageHeader) page)->pd_linp[offsetNumber - 1];
247}
248
249/*
250 * PageGetContents
251 * To be used in cases where the page does not contain line pointers.
252 *
253 * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
254 * Now it is. Beware of old code that might think the offset to the contents
255 * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
256 */
257static inline char *
259{
260 return (char *) page + MAXALIGN(SizeOfPageHeaderData);
261}
262
263/* ----------------
264 * functions to access page size info
265 * ----------------
266 */
267
268/*
269 * PageGetPageSize
270 * Returns the page size of a page.
271 *
272 * this can only be called on a formatted page (unlike
273 * BufferGetPageSize, which can be called on an unformatted page).
274 * however, it can be called on a page that is not stored in a buffer.
275 */
276static inline Size
277PageGetPageSize(const PageData *page)
278{
279 return (Size) (((const PageHeaderData *) page)->pd_pagesize_version & (uint16) 0xFF00);
280}
281
282/*
283 * PageGetPageLayoutVersion
284 * Returns the page layout version of a page.
285 */
286static inline uint8
288{
289 return (((const PageHeaderData *) page)->pd_pagesize_version & 0x00FF);
290}
291
292/*
293 * PageSetPageSizeAndVersion
294 * Sets the page size and page layout version number of a page.
295 *
296 * We could support setting these two values separately, but there's
297 * no real need for it at the moment.
298 */
299static inline void
300PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
301{
302 Assert((size & 0xFF00) == size);
303 Assert((version & 0x00FF) == version);
304
305 ((PageHeader) page)->pd_pagesize_version = size | version;
306}
307
308/* ----------------
309 * page special data functions
310 * ----------------
311 */
312/*
313 * PageGetSpecialSize
314 * Returns size of special space on a page.
315 */
316static inline uint16
317PageGetSpecialSize(const PageData *page)
318{
319 return (PageGetPageSize(page) - ((const PageHeaderData *) page)->pd_special);
320}
321
322/*
323 * Using assertions, validate that the page special pointer is OK.
324 *
325 * This is intended to catch use of the pointer before page initialization.
326 */
327static inline void
329{
330 Assert(page);
331 Assert(((const PageHeaderData *) page)->pd_special <= BLCKSZ);
332 Assert(((const PageHeaderData *) page)->pd_special >= SizeOfPageHeaderData);
333}
334
335/*
336 * PageGetSpecialPointer
337 * Returns pointer to special space on a page.
338 */
339#define PageGetSpecialPointer(page) \
340( \
341 PageValidateSpecialPointer(page), \
342 ((page) + ((PageHeader) (page))->pd_special) \
343)
344
345/*
346 * PageGetItem
347 * Retrieves an item on the given page.
348 *
349 * Note:
350 * This does not change the status of any of the resources passed.
351 * The semantics may change in the future.
352 */
353static inline void *
354PageGetItem(const PageData *page, const ItemIdData *itemId)
355{
356 Assert(page);
357 Assert(ItemIdHasStorage(itemId));
358
359 return (void *) (((const char *) page) + ItemIdGetOffset(itemId));
360}
361
362/*
363 * PageGetMaxOffsetNumber
364 * Returns the maximum offset number used by the given page.
365 * Since offset numbers are 1-based, this is also the number
366 * of items on the page.
367 *
368 * NOTE: if the page is not initialized (pd_lower == 0), we must
369 * return zero to ensure sane behavior.
370 */
371static inline OffsetNumber
373{
374 const PageHeaderData *pageheader = (const PageHeaderData *) page;
375
376 if (pageheader->pd_lower <= SizeOfPageHeaderData)
377 return 0;
378 else
379 return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
380}
381
382/*
383 * Additional functions for access to page headers.
384 */
385static inline XLogRecPtr
386PageGetLSN(const PageData *page)
387{
388 return PageXLogRecPtrGet(((const PageHeaderData *) page)->pd_lsn);
389}
390static inline void
391PageSetLSN(Page page, XLogRecPtr lsn)
392{
393 PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
394}
395
396static inline bool
398{
399 return ((const PageHeaderData *) page)->pd_flags & PD_HAS_FREE_LINES;
400}
401static inline void
403{
404 ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
405}
406static inline void
408{
409 ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
410}
411
412static inline bool
413PageIsFull(const PageData *page)
414{
415 return ((const PageHeaderData *) page)->pd_flags & PD_PAGE_FULL;
416}
417static inline void
418PageSetFull(Page page)
419{
420 ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
421}
422static inline void
424{
425 ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
426}
427
428static inline bool
429PageIsAllVisible(const PageData *page)
430{
431 return ((const PageHeaderData *) page)->pd_flags & PD_ALL_VISIBLE;
432}
433static inline void
435{
436 ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
437}
438static inline void
440{
441 ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
442}
443
444/*
445 * These two require "access/transam.h", so left as macros.
446 */
447#define PageSetPrunable(page, xid) \
448do { \
449 Assert(TransactionIdIsNormal(xid)); \
450 if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
451 TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
452 ((PageHeader) (page))->pd_prune_xid = (xid); \
453} while (0)
454#define PageClearPrunable(page) \
455 (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
456
457
458/* ----------------------------------------------------------------
459 * extern declarations
460 * ----------------------------------------------------------------
461 */
462
463/* flags for PageAddItemExtended() */
464#define PAI_OVERWRITE (1 << 0)
465#define PAI_IS_HEAP (1 << 1)
466
467/* flags for PageIsVerified() */
468#define PIV_LOG_WARNING (1 << 0)
469#define PIV_LOG_LOG (1 << 1)
470#define PIV_IGNORE_CHECKSUM_FAILURE (1 << 2)
472#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
473 PageAddItemExtended(page, item, size, offsetNumber, \
474 ((overwrite) ? PAI_OVERWRITE : 0) | \
475 ((is_heap) ? PAI_IS_HEAP : 0))
476
477/*
478 * Check that BLCKSZ is a multiple of sizeof(size_t). In PageIsVerified(), it
479 * is much faster to check if a page is full of zeroes using the native word
480 * size. Note that this assertion is kept within a header to make sure that
481 * StaticAssertDecl() works across various combinations of platforms and
482 * compilers.
483 */
484StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
485 "BLCKSZ has to be a multiple of sizeof(size_t)");
486
487extern void PageInit(Page page, Size pageSize, Size specialSize);
488extern bool PageIsVerified(PageData *page, BlockNumber blkno, int flags,
489 bool *checksum_failure_p);
490extern OffsetNumber PageAddItemExtended(Page page, const void *item, Size size,
491 OffsetNumber offsetNumber, int flags);
492extern Page PageGetTempPage(const PageData *page);
493extern Page PageGetTempPageCopy(const PageData *page);
494extern Page PageGetTempPageCopySpecial(const PageData *page);
495extern void PageRestoreTempPage(Page tempPage, Page oldPage);
496extern void PageRepairFragmentation(Page page);
497extern void PageTruncateLinePointerArray(Page page);
498extern Size PageGetFreeSpace(const PageData *page);
499extern Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups);
500extern Size PageGetExactFreeSpace(const PageData *page);
501extern Size PageGetHeapFreeSpace(const PageData *page);
502extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
503extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
504extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
505extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
506 const void *newtup, Size newsize);
507extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
508extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
509
510#endif /* BUFPAGE_H */
uint32 BlockNumber
Definition: block.h:31
struct PageHeaderData PageHeaderData
Size PageGetFreeSpace(const PageData *page)
Definition: bufpage.c:906
#define PD_PAGE_FULL
Definition: bufpage.h:188
static bool PageIsEmpty(const PageData *page)
Definition: bufpage.h:223
PageHeaderData * PageHeader
Definition: bufpage.h:173
PGDLLIMPORT bool ignore_checksum_failure
Definition: bufpage.c:27
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:423
static void PageSetHasFreeLinePointers(Page page)
Definition: bufpage.h:401
Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
Definition: bufpage.c:933
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:990
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:428
static uint16 PageGetSpecialSize(const PageData *page)
Definition: bufpage.h:316
char PageData
Definition: bufpage.h:80
static void PageClearAllVisible(Page page)
Definition: bufpage.h:438
Page PageGetTempPage(const PageData *page)
Definition: bufpage.c:364
static Size PageGetPageSize(const PageData *page)
Definition: bufpage.h:276
Page PageGetTempPageCopy(const PageData *page)
Definition: bufpage.c:381
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1160
static void * PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
static void PageClearFull(Page page)
Definition: bufpage.h:422
static XLogRecPtr PageXLogRecPtrGet(PageXLogRecPtr val)
Definition: bufpage.h:104
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:233
#define SizeOfPageHeaderData
Definition: bufpage.h:216
static char * PageGetContents(Page page)
Definition: bufpage.h:257
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, const void *newtup, Size newsize)
Definition: bufpage.c:1404
static void PageSetAllVisible(Page page)
Definition: bufpage.h:433
static uint8 PageGetPageLayoutVersion(const PageData *page)
Definition: bufpage.h:286
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
#define PD_ALL_VISIBLE
Definition: bufpage.h:189
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1541
static void PageSetFull(Page page)
Definition: bufpage.h:417
static bool PageHasFreeLinePointers(const PageData *page)
Definition: bufpage.h:396
OffsetNumber PageAddItemExtended(Page page, const void *item, Size size, OffsetNumber offsetNumber, int flags)
Definition: bufpage.c:193
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1051
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1509
void PageRepairFragmentation(Page page)
Definition: bufpage.c:698
Size PageGetExactFreeSpace(const PageData *page)
Definition: bufpage.c:957
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:834
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
static void PageClearHasFreeLinePointers(Page page)
Definition: bufpage.h:406
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
Definition: bufpage.c:94
StaticAssertDecl(BLCKSZ==((BLCKSZ/sizeof(size_t)) *sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)")
PageData * Page
Definition: bufpage.h:81
#define PD_HAS_FREE_LINES
Definition: bufpage.h:187
Page PageGetTempPageCopySpecial(const PageData *page)
Definition: bufpage.c:401
uint16 LocationIndex
Definition: bufpage.h:90
static void PageValidateSpecialPointer(const PageData *page)
Definition: bufpage.h:327
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1294
static XLogRecPtr PageGetLSN(const PageData *page)
Definition: bufpage.h:385
static void PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
Definition: bufpage.h:299
#define PageXLogRecPtrSet(ptr, lsn)
Definition: bufpage.h:109
static bool PageIsFull(const PageData *page)
Definition: bufpage.h:412
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:371
#define PGDLLIMPORT
Definition: c.h:1320
#define MAXALIGN(LEN)
Definition: c.h:815
uint8_t uint8
Definition: c.h:541
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:475
uint64_t uint64
Definition: c.h:544
uint16_t uint16
Definition: c.h:542
uint32_t uint32
Definition: c.h:543
uint32 TransactionId
Definition: c.h:662
size_t Size
Definition: c.h:615
Assert(PointerIsAligned(start, uint64))
#define nitems(x)
Definition: indent.h:31
long val
Definition: informix.c:689
#define ItemIdGetOffset(itemId)
Definition: itemid.h:65
struct ItemIdData ItemIdData
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
uint16 OffsetNumber
Definition: off.h:24
PageXLogRecPtr pd_lsn
Definition: bufpage.h:161
LocationIndex pd_special
Definition: bufpage.h:167
LocationIndex pd_upper
Definition: bufpage.h:166
uint16 pd_flags
Definition: bufpage.h:164
uint16 pd_checksum
Definition: bufpage.h:163
LocationIndex pd_lower
Definition: bufpage.h:165
uint16 pd_pagesize_version
Definition: bufpage.h:168
TransactionId pd_prune_xid
Definition: bufpage.h:169
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]
Definition: bufpage.h:170
uint32 xrecoff
Definition: bufpage.h:100
uint32 xlogid
Definition: bufpage.h:99
uint64 XLogRecPtr
Definition: xlogdefs.h:21