PostgreSQL Source Code git master
hash_xlog.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hash_xlog.c
4 * WAL replay logic for hash index.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hash_xlog.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/bufmask.h"
18#include "access/hash.h"
19#include "access/hash_xlog.h"
20#include "access/xlogutils.h"
21#include "storage/standby.h"
22
23/*
24 * replay a hash index meta page
25 */
26static void
28{
29 XLogRecPtr lsn = record->EndRecPtr;
30 Page page;
31 Buffer metabuf;
32 ForkNumber forknum;
33
35
36 /* create the index' metapage */
37 metabuf = XLogInitBufferForRedo(record, 0);
38 Assert(BufferIsValid(metabuf));
39 _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 xlrec->ffactor, true);
41 page = BufferGetPage(metabuf);
42 PageSetLSN(page, lsn);
43 MarkBufferDirty(metabuf);
44
45 /*
46 * Force the on-disk state of init forks to always be in sync with the
47 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 * special handling for init forks as create index operations don't log a
49 * full page image of the metapage.
50 */
51 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 if (forknum == INIT_FORKNUM)
53 FlushOneBuffer(metabuf);
54
55 /* all done */
56 UnlockReleaseBuffer(metabuf);
57}
58
59/*
60 * replay a hash index bitmap page
61 */
62static void
64{
65 XLogRecPtr lsn = record->EndRecPtr;
66 Buffer bitmapbuf;
67 Buffer metabuf;
68 Page page;
69 HashMetaPage metap;
70 uint32 num_buckets;
71 ForkNumber forknum;
72
74
75 /*
76 * Initialize bitmap page
77 */
78 bitmapbuf = XLogInitBufferForRedo(record, 0);
79 _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 MarkBufferDirty(bitmapbuf);
82
83 /*
84 * Force the on-disk state of init forks to always be in sync with the
85 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 * special handling for init forks as create index operations don't log a
87 * full page image of the metapage.
88 */
89 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 if (forknum == INIT_FORKNUM)
91 FlushOneBuffer(bitmapbuf);
92 UnlockReleaseBuffer(bitmapbuf);
93
94 /* add the new bitmap page to the metapage's list of bitmaps */
95 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 {
97 /*
98 * Note: in normal operation, we'd update the metapage while still
99 * holding lock on the bitmap page. But during replay it's not
100 * necessary to hold that lock, since nobody can see it yet; the
101 * creating transaction hasn't yet committed.
102 */
103 page = BufferGetPage(metabuf);
104 metap = HashPageGetMeta(page);
105
106 num_buckets = metap->hashm_maxbucket + 1;
107 metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 metap->hashm_nmaps++;
109
110 PageSetLSN(page, lsn);
111 MarkBufferDirty(metabuf);
112
113 XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 if (forknum == INIT_FORKNUM)
115 FlushOneBuffer(metabuf);
116 }
117 if (BufferIsValid(metabuf))
118 UnlockReleaseBuffer(metabuf);
119}
120
121/*
122 * replay a hash index insert without split
123 */
124static void
126{
127 HashMetaPage metap;
128 XLogRecPtr lsn = record->EndRecPtr;
129 xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 Buffer buffer;
131 Page page;
132
133 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 {
135 Size datalen;
136 char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137
138 page = BufferGetPage(buffer);
139
140 if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
141 elog(PANIC, "hash_xlog_insert: failed to add item");
142
143 PageSetLSN(page, lsn);
144 MarkBufferDirty(buffer);
145 }
146 if (BufferIsValid(buffer))
147 UnlockReleaseBuffer(buffer);
148
149 if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
150 {
151 /*
152 * Note: in normal operation, we'd update the metapage while still
153 * holding lock on the page we inserted into. But during replay it's
154 * not necessary to hold that lock, since no other index updates can
155 * be happening concurrently.
156 */
157 page = BufferGetPage(buffer);
158 metap = HashPageGetMeta(page);
159 metap->hashm_ntuples += 1;
160
161 PageSetLSN(page, lsn);
162 MarkBufferDirty(buffer);
163 }
164 if (BufferIsValid(buffer))
165 UnlockReleaseBuffer(buffer);
166}
167
168/*
169 * replay addition of overflow page for hash index
170 */
171static void
173{
174 XLogRecPtr lsn = record->EndRecPtr;
176 Buffer leftbuf;
177 Buffer ovflbuf;
178 Buffer metabuf;
179 BlockNumber leftblk;
180 BlockNumber rightblk;
182 Page ovflpage;
183 HashPageOpaque ovflopaque;
184 uint32 *num_bucket;
185 char *data;
187 bool new_bmpage = false;
188
189 XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
190 XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
191
192 ovflbuf = XLogInitBufferForRedo(record, 0);
193 Assert(BufferIsValid(ovflbuf));
194
195 data = XLogRecGetBlockData(record, 0, &datalen);
196 num_bucket = (uint32 *) data;
197 Assert(datalen == sizeof(uint32));
199 true);
200 /* update backlink */
201 ovflpage = BufferGetPage(ovflbuf);
202 ovflopaque = HashPageGetOpaque(ovflpage);
203 ovflopaque->hasho_prevblkno = leftblk;
204
205 PageSetLSN(ovflpage, lsn);
206 MarkBufferDirty(ovflbuf);
207
208 if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
209 {
210 Page leftpage;
211 HashPageOpaque leftopaque;
212
213 leftpage = BufferGetPage(leftbuf);
214 leftopaque = HashPageGetOpaque(leftpage);
215 leftopaque->hasho_nextblkno = rightblk;
216
217 PageSetLSN(leftpage, lsn);
218 MarkBufferDirty(leftbuf);
219 }
220
221 if (BufferIsValid(leftbuf))
222 UnlockReleaseBuffer(leftbuf);
223 UnlockReleaseBuffer(ovflbuf);
224
225 /*
226 * Note: in normal operation, we'd update the bitmap and meta page while
227 * still holding lock on the overflow pages. But during replay it's not
228 * necessary to hold those locks, since no other index updates can be
229 * happening concurrently.
230 */
231 if (XLogRecHasBlockRef(record, 2))
232 {
233 Buffer mapbuffer;
234
235 if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
236 {
237 Page mappage = BufferGetPage(mapbuffer);
238 uint32 *freep = NULL;
239 uint32 *bitmap_page_bit;
240
241 freep = HashPageGetBitmap(mappage);
242
243 data = XLogRecGetBlockData(record, 2, &datalen);
244 bitmap_page_bit = (uint32 *) data;
245
246 SETBIT(freep, *bitmap_page_bit);
247
248 PageSetLSN(mappage, lsn);
249 MarkBufferDirty(mapbuffer);
250 }
251 if (BufferIsValid(mapbuffer))
252 UnlockReleaseBuffer(mapbuffer);
253 }
254
255 if (XLogRecHasBlockRef(record, 3))
256 {
257 Buffer newmapbuf;
258
259 newmapbuf = XLogInitBufferForRedo(record, 3);
260
261 _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
262
263 new_bmpage = true;
264 newmapblk = BufferGetBlockNumber(newmapbuf);
265
266 MarkBufferDirty(newmapbuf);
267 PageSetLSN(BufferGetPage(newmapbuf), lsn);
268
269 UnlockReleaseBuffer(newmapbuf);
270 }
271
272 if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
273 {
274 HashMetaPage metap;
275 Page page;
276 uint32 *firstfree_ovflpage;
277
278 data = XLogRecGetBlockData(record, 4, &datalen);
279 firstfree_ovflpage = (uint32 *) data;
280
281 page = BufferGetPage(metabuf);
282 metap = HashPageGetMeta(page);
283 metap->hashm_firstfree = *firstfree_ovflpage;
284
285 if (!xlrec->bmpage_found)
286 {
287 metap->hashm_spares[metap->hashm_ovflpoint]++;
288
289 if (new_bmpage)
290 {
291 Assert(BlockNumberIsValid(newmapblk));
292
293 metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
294 metap->hashm_nmaps++;
295 metap->hashm_spares[metap->hashm_ovflpoint]++;
296 }
297 }
298
299 PageSetLSN(page, lsn);
300 MarkBufferDirty(metabuf);
301 }
302 if (BufferIsValid(metabuf))
303 UnlockReleaseBuffer(metabuf);
304}
305
306/*
307 * replay allocation of page for split operation
308 */
309static void
311{
312 XLogRecPtr lsn = record->EndRecPtr;
314 Buffer oldbuf;
315 Buffer newbuf;
316 Buffer metabuf;
318 char *data;
320
321 /*
322 * To be consistent with normal operation, here we take cleanup locks on
323 * both the old and new buckets even though there can't be any concurrent
324 * inserts.
325 */
326
327 /* replay the record for old bucket */
328 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
329
330 /*
331 * Note that we still update the page even if it was restored from a full
332 * page image, because the special space is not included in the image.
333 */
335 {
336 Page oldpage;
337 HashPageOpaque oldopaque;
338
339 oldpage = BufferGetPage(oldbuf);
340 oldopaque = HashPageGetOpaque(oldpage);
341
342 oldopaque->hasho_flag = xlrec->old_bucket_flag;
343 oldopaque->hasho_prevblkno = xlrec->new_bucket;
344
345 PageSetLSN(oldpage, lsn);
346 MarkBufferDirty(oldbuf);
347 }
348
349 /* replay the record for new bucket */
351 &newbuf);
352 _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
353 xlrec->new_bucket_flag, true);
354 MarkBufferDirty(newbuf);
355 PageSetLSN(BufferGetPage(newbuf), lsn);
356
357 /*
358 * We can release the lock on old bucket early as well but doing here to
359 * consistent with normal operation.
360 */
361 if (BufferIsValid(oldbuf))
362 UnlockReleaseBuffer(oldbuf);
363 if (BufferIsValid(newbuf))
364 UnlockReleaseBuffer(newbuf);
365
366 /*
367 * Note: in normal operation, we'd update the meta page while still
368 * holding lock on the old and new bucket pages. But during replay it's
369 * not necessary to hold those locks, since no other bucket splits can be
370 * happening concurrently.
371 */
372
373 /* replay the record for metapage changes */
374 if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
375 {
376 Page page;
377 HashMetaPage metap;
378
379 page = BufferGetPage(metabuf);
380 metap = HashPageGetMeta(page);
381 metap->hashm_maxbucket = xlrec->new_bucket;
382
383 data = XLogRecGetBlockData(record, 2, &datalen);
384
386 {
387 uint32 lowmask;
388 uint32 *highmask;
389
390 /* extract low and high masks. */
391 memcpy(&lowmask, data, sizeof(uint32));
392 highmask = (uint32 *) ((char *) data + sizeof(uint32));
393
394 /* update metapage */
395 metap->hashm_lowmask = lowmask;
396 metap->hashm_highmask = *highmask;
397
398 data += sizeof(uint32) * 2;
399 }
400
402 {
403 uint32 ovflpoint;
404 uint32 *ovflpages;
405
406 /* extract information of overflow pages. */
407 memcpy(&ovflpoint, data, sizeof(uint32));
408 ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
409
410 /* update metapage */
411 metap->hashm_spares[ovflpoint] = *ovflpages;
412 metap->hashm_ovflpoint = ovflpoint;
413 }
414
415 MarkBufferDirty(metabuf);
416 PageSetLSN(BufferGetPage(metabuf), lsn);
417 }
418
419 if (BufferIsValid(metabuf))
420 UnlockReleaseBuffer(metabuf);
421}
422
423/*
424 * replay of split operation
425 */
426static void
428{
429 Buffer buf;
430
431 if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
432 elog(ERROR, "Hash split record did not contain a full-page image");
433
435}
436
437/*
438 * replay completion of split operation
439 */
440static void
442{
443 XLogRecPtr lsn = record->EndRecPtr;
445 Buffer oldbuf;
446 Buffer newbuf;
448
449 /* replay the record for old bucket */
450 action = XLogReadBufferForRedo(record, 0, &oldbuf);
451
452 /*
453 * Note that we still update the page even if it was restored from a full
454 * page image, because the bucket flag is not included in the image.
455 */
457 {
458 Page oldpage;
459 HashPageOpaque oldopaque;
460
461 oldpage = BufferGetPage(oldbuf);
462 oldopaque = HashPageGetOpaque(oldpage);
463
464 oldopaque->hasho_flag = xlrec->old_bucket_flag;
465
466 PageSetLSN(oldpage, lsn);
467 MarkBufferDirty(oldbuf);
468 }
469 if (BufferIsValid(oldbuf))
470 UnlockReleaseBuffer(oldbuf);
471
472 /* replay the record for new bucket */
473 action = XLogReadBufferForRedo(record, 1, &newbuf);
474
475 /*
476 * Note that we still update the page even if it was restored from a full
477 * page image, because the bucket flag is not included in the image.
478 */
480 {
481 Page newpage;
482 HashPageOpaque nopaque;
483
484 newpage = BufferGetPage(newbuf);
485 nopaque = HashPageGetOpaque(newpage);
486
487 nopaque->hasho_flag = xlrec->new_bucket_flag;
488
489 PageSetLSN(newpage, lsn);
490 MarkBufferDirty(newbuf);
491 }
492 if (BufferIsValid(newbuf))
493 UnlockReleaseBuffer(newbuf);
494}
495
496/*
497 * replay move of page contents for squeeze operation of hash index
498 */
499static void
501{
502 XLogRecPtr lsn = record->EndRecPtr;
504 Buffer bucketbuf = InvalidBuffer;
505 Buffer writebuf = InvalidBuffer;
506 Buffer deletebuf = InvalidBuffer;
508
509 /*
510 * Ensure we have a cleanup lock on primary bucket page before we start
511 * with the actual replay operation. This is to ensure that neither a
512 * scan can start nor a scan can be already-in-progress during the replay
513 * of this operation. If we allow scans during this operation, then they
514 * can miss some records or show the same record multiple times.
515 */
516 if (xldata->is_prim_bucket_same_wrt)
517 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
518 else
519 {
520 /*
521 * we don't care for return value as the purpose of reading bucketbuf
522 * is to ensure a cleanup lock on primary bucket page.
523 */
524 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
525
526 action = XLogReadBufferForRedo(record, 1, &writebuf);
527 }
528
529 /* replay the record for adding entries in overflow buffer */
530 if (action == BLK_NEEDS_REDO)
531 {
532 Page writepage;
533 char *begin;
534 char *data;
535 Size datalen;
536 uint16 ninserted = 0;
537
538 data = begin = XLogRecGetBlockData(record, 1, &datalen);
539
540 writepage = BufferGetPage(writebuf);
541
542 if (xldata->ntups > 0)
543 {
544 OffsetNumber *towrite = (OffsetNumber *) data;
545
546 data += sizeof(OffsetNumber) * xldata->ntups;
547
548 while (data - begin < datalen)
549 {
550 IndexTuple itup = (IndexTuple) data;
551 Size itemsz;
552 OffsetNumber l;
553
554 itemsz = IndexTupleSize(itup);
555 itemsz = MAXALIGN(itemsz);
556
557 data += itemsz;
558
559 l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
560 if (l == InvalidOffsetNumber)
561 elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
562 (int) itemsz);
563
564 ninserted++;
565 }
566 }
567
568 /*
569 * number of tuples inserted must be same as requested in REDO record.
570 */
571 Assert(ninserted == xldata->ntups);
572
573 PageSetLSN(writepage, lsn);
574 MarkBufferDirty(writebuf);
575 }
576
577 /* replay the record for deleting entries from overflow buffer */
578 if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
579 {
580 Page page;
581 char *ptr;
582 Size len;
583
584 ptr = XLogRecGetBlockData(record, 2, &len);
585
586 page = BufferGetPage(deletebuf);
587
588 if (len > 0)
589 {
590 OffsetNumber *unused;
591 OffsetNumber *unend;
592
593 unused = (OffsetNumber *) ptr;
594 unend = (OffsetNumber *) ((char *) ptr + len);
595
596 if ((unend - unused) > 0)
597 PageIndexMultiDelete(page, unused, unend - unused);
598 }
599
600 PageSetLSN(page, lsn);
601 MarkBufferDirty(deletebuf);
602 }
603
604 /*
605 * Replay is complete, now we can release the buffers. We release locks at
606 * end of replay operation to ensure that we hold lock on primary bucket
607 * page till end of operation. We can optimize by releasing the lock on
608 * write buffer as soon as the operation for same is complete, if it is
609 * not same as primary bucket page, but that doesn't seem to be worth
610 * complicating the code.
611 */
612 if (BufferIsValid(deletebuf))
613 UnlockReleaseBuffer(deletebuf);
614
615 if (BufferIsValid(writebuf))
616 UnlockReleaseBuffer(writebuf);
617
618 if (BufferIsValid(bucketbuf))
619 UnlockReleaseBuffer(bucketbuf);
620}
621
622/*
623 * replay squeeze page operation of hash index
624 */
625static void
627{
628 XLogRecPtr lsn = record->EndRecPtr;
630 Buffer bucketbuf = InvalidBuffer;
631 Buffer writebuf = InvalidBuffer;
632 Buffer ovflbuf;
633 Buffer prevbuf = InvalidBuffer;
634 Buffer mapbuf;
636
637 /*
638 * Ensure we have a cleanup lock on primary bucket page before we start
639 * with the actual replay operation. This is to ensure that neither a
640 * scan can start nor a scan can be already-in-progress during the replay
641 * of this operation. If we allow scans during this operation, then they
642 * can miss some records or show the same record multiple times.
643 */
644 if (xldata->is_prim_bucket_same_wrt)
645 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
646 else
647 {
648 /*
649 * we don't care for return value as the purpose of reading bucketbuf
650 * is to ensure a cleanup lock on primary bucket page.
651 */
652 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
653
654 if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
655 action = XLogReadBufferForRedo(record, 1, &writebuf);
656 else
658 }
659
660 /* replay the record for adding entries in overflow buffer */
661 if (action == BLK_NEEDS_REDO)
662 {
663 Page writepage;
664 char *begin;
665 char *data;
666 Size datalen;
667 uint16 ninserted = 0;
668 bool mod_wbuf = false;
669
670 data = begin = XLogRecGetBlockData(record, 1, &datalen);
671
672 writepage = BufferGetPage(writebuf);
673
674 if (xldata->ntups > 0)
675 {
676 OffsetNumber *towrite = (OffsetNumber *) data;
677
678 data += sizeof(OffsetNumber) * xldata->ntups;
679
680 while (data - begin < datalen)
681 {
682 IndexTuple itup = (IndexTuple) data;
683 Size itemsz;
684 OffsetNumber l;
685
686 itemsz = IndexTupleSize(itup);
687 itemsz = MAXALIGN(itemsz);
688
689 data += itemsz;
690
691 l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
692 if (l == InvalidOffsetNumber)
693 elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
694 (int) itemsz);
695
696 ninserted++;
697 }
698
699 mod_wbuf = true;
700 }
701 else
702 {
703 /*
704 * Ensure that the required flags are set when there are no
705 * tuples. See _hash_freeovflpage().
706 */
709 }
710
711 /*
712 * number of tuples inserted must be same as requested in REDO record.
713 */
714 Assert(ninserted == xldata->ntups);
715
716 /*
717 * if the page on which are adding tuples is a page previous to freed
718 * overflow page, then update its nextblkno.
719 */
720 if (xldata->is_prev_bucket_same_wrt)
721 {
722 HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
723
724 writeopaque->hasho_nextblkno = xldata->nextblkno;
725 mod_wbuf = true;
726 }
727
728 /* Set LSN and mark writebuf dirty iff it is modified */
729 if (mod_wbuf)
730 {
731 PageSetLSN(writepage, lsn);
732 MarkBufferDirty(writebuf);
733 }
734 }
735
736 /* replay the record for initializing overflow buffer */
737 if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
738 {
739 Page ovflpage;
740 HashPageOpaque ovflopaque;
741
742 ovflpage = BufferGetPage(ovflbuf);
743
744 _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
745
746 ovflopaque = HashPageGetOpaque(ovflpage);
747
750 ovflopaque->hasho_bucket = InvalidBucket;
751 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
752 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
753
754 PageSetLSN(ovflpage, lsn);
755 MarkBufferDirty(ovflbuf);
756 }
757 if (BufferIsValid(ovflbuf))
758 UnlockReleaseBuffer(ovflbuf);
759
760 /* replay the record for page previous to the freed overflow page */
761 if (!xldata->is_prev_bucket_same_wrt &&
762 XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
763 {
764 Page prevpage = BufferGetPage(prevbuf);
765 HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
766
767 prevopaque->hasho_nextblkno = xldata->nextblkno;
768
769 PageSetLSN(prevpage, lsn);
770 MarkBufferDirty(prevbuf);
771 }
772 if (BufferIsValid(prevbuf))
773 UnlockReleaseBuffer(prevbuf);
774
775 /* replay the record for page next to the freed overflow page */
776 if (XLogRecHasBlockRef(record, 4))
777 {
778 Buffer nextbuf;
779
780 if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
781 {
782 Page nextpage = BufferGetPage(nextbuf);
783 HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
784
785 nextopaque->hasho_prevblkno = xldata->prevblkno;
786
787 PageSetLSN(nextpage, lsn);
788 MarkBufferDirty(nextbuf);
789 }
790 if (BufferIsValid(nextbuf))
791 UnlockReleaseBuffer(nextbuf);
792 }
793
794 if (BufferIsValid(writebuf))
795 UnlockReleaseBuffer(writebuf);
796
797 if (BufferIsValid(bucketbuf))
798 UnlockReleaseBuffer(bucketbuf);
799
800 /*
801 * Note: in normal operation, we'd update the bitmap and meta page while
802 * still holding lock on the primary bucket page and overflow pages. But
803 * during replay it's not necessary to hold those locks, since no other
804 * index updates can be happening concurrently.
805 */
806 /* replay the record for bitmap page */
807 if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
808 {
809 Page mappage = BufferGetPage(mapbuf);
810 uint32 *freep = NULL;
811 char *data;
812 uint32 *bitmap_page_bit;
813 Size datalen;
814
815 freep = HashPageGetBitmap(mappage);
816
817 data = XLogRecGetBlockData(record, 5, &datalen);
818 bitmap_page_bit = (uint32 *) data;
819
820 CLRBIT(freep, *bitmap_page_bit);
821
822 PageSetLSN(mappage, lsn);
823 MarkBufferDirty(mapbuf);
824 }
825 if (BufferIsValid(mapbuf))
826 UnlockReleaseBuffer(mapbuf);
827
828 /* replay the record for meta page */
829 if (XLogRecHasBlockRef(record, 6))
830 {
831 Buffer metabuf;
832
833 if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
834 {
835 HashMetaPage metap;
836 Page page;
837 char *data;
838 uint32 *firstfree_ovflpage;
839 Size datalen;
840
841 data = XLogRecGetBlockData(record, 6, &datalen);
842 firstfree_ovflpage = (uint32 *) data;
843
844 page = BufferGetPage(metabuf);
845 metap = HashPageGetMeta(page);
846 metap->hashm_firstfree = *firstfree_ovflpage;
847
848 PageSetLSN(page, lsn);
849 MarkBufferDirty(metabuf);
850 }
851 if (BufferIsValid(metabuf))
852 UnlockReleaseBuffer(metabuf);
853 }
854}
855
856/*
857 * replay delete operation of hash index
858 */
859static void
861{
862 XLogRecPtr lsn = record->EndRecPtr;
863 xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
864 Buffer bucketbuf = InvalidBuffer;
865 Buffer deletebuf;
866 Page page;
868
869 /*
870 * Ensure we have a cleanup lock on primary bucket page before we start
871 * with the actual replay operation. This is to ensure that neither a
872 * scan can start nor a scan can be already-in-progress during the replay
873 * of this operation. If we allow scans during this operation, then they
874 * can miss some records or show the same record multiple times.
875 */
876 if (xldata->is_primary_bucket_page)
877 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
878 else
879 {
880 /*
881 * we don't care for return value as the purpose of reading bucketbuf
882 * is to ensure a cleanup lock on primary bucket page.
883 */
884 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
885
886 action = XLogReadBufferForRedo(record, 1, &deletebuf);
887 }
888
889 /* replay the record for deleting entries in bucket page */
890 if (action == BLK_NEEDS_REDO)
891 {
892 char *ptr;
893 Size len;
894
895 ptr = XLogRecGetBlockData(record, 1, &len);
896
897 page = BufferGetPage(deletebuf);
898
899 if (len > 0)
900 {
901 OffsetNumber *unused;
902 OffsetNumber *unend;
903
904 unused = (OffsetNumber *) ptr;
905 unend = (OffsetNumber *) ((char *) ptr + len);
906
907 if ((unend - unused) > 0)
908 PageIndexMultiDelete(page, unused, unend - unused);
909 }
910
911 /*
912 * Mark the page as not containing any LP_DEAD items only if
913 * clear_dead_marking flag is set to true. See comments in
914 * hashbucketcleanup() for details.
915 */
916 if (xldata->clear_dead_marking)
917 {
918 HashPageOpaque pageopaque;
919
920 pageopaque = HashPageGetOpaque(page);
921 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
922 }
923
924 PageSetLSN(page, lsn);
925 MarkBufferDirty(deletebuf);
926 }
927 if (BufferIsValid(deletebuf))
928 UnlockReleaseBuffer(deletebuf);
929
930 if (BufferIsValid(bucketbuf))
931 UnlockReleaseBuffer(bucketbuf);
932}
933
934/*
935 * replay split cleanup flag operation for primary bucket page.
936 */
937static void
939{
940 XLogRecPtr lsn = record->EndRecPtr;
941 Buffer buffer;
942 Page page;
943
944 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
945 {
946 HashPageOpaque bucket_opaque;
947
948 page = BufferGetPage(buffer);
949
950 bucket_opaque = HashPageGetOpaque(page);
951 bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
952 PageSetLSN(page, lsn);
953 MarkBufferDirty(buffer);
954 }
955 if (BufferIsValid(buffer))
956 UnlockReleaseBuffer(buffer);
957}
958
959/*
960 * replay for update meta page
961 */
962static void
964{
965 HashMetaPage metap;
966 XLogRecPtr lsn = record->EndRecPtr;
968 Buffer metabuf;
969 Page page;
970
971 if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
972 {
973 page = BufferGetPage(metabuf);
974 metap = HashPageGetMeta(page);
975
976 metap->hashm_ntuples = xldata->ntuples;
977
978 PageSetLSN(page, lsn);
979 MarkBufferDirty(metabuf);
980 }
981 if (BufferIsValid(metabuf))
982 UnlockReleaseBuffer(metabuf);
983}
984
985/*
986 * replay delete operation in hash index to remove
987 * tuples marked as DEAD during index tuple insertion.
988 */
989static void
991{
992 XLogRecPtr lsn = record->EndRecPtr;
994 Buffer buffer;
995 Buffer metabuf;
996 Page page;
998 HashPageOpaque pageopaque;
999 OffsetNumber *toDelete;
1000
1001 xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
1002 toDelete = xldata->offsets;
1003
1004 /*
1005 * If we have any conflict processing to do, it must happen before we
1006 * update the page.
1007 *
1008 * Hash index records that are marked as LP_DEAD and being removed during
1009 * hash index tuple insertion can conflict with standby queries. You might
1010 * think that vacuum records would conflict as well, but we've handled
1011 * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1012 * xid cleaned by the vacuum of the heap and so we can resolve any
1013 * conflicts just once when that arrives. After that we know that no
1014 * conflicts exist from individual hash index vacuum records on that
1015 * index.
1016 */
1017 if (InHotStandby)
1018 {
1019 RelFileLocator rlocator;
1020
1021 XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1023 xldata->isCatalogRel,
1024 rlocator);
1025 }
1026
1027 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1028
1029 if (action == BLK_NEEDS_REDO)
1030 {
1031 page = BufferGetPage(buffer);
1032
1033 PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1034
1035 /*
1036 * Mark the page as not containing any LP_DEAD items. See comments in
1037 * _hash_vacuum_one_page() for details.
1038 */
1039 pageopaque = HashPageGetOpaque(page);
1040 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1041
1042 PageSetLSN(page, lsn);
1043 MarkBufferDirty(buffer);
1044 }
1045 if (BufferIsValid(buffer))
1046 UnlockReleaseBuffer(buffer);
1047
1048 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1049 {
1050 Page metapage;
1051 HashMetaPage metap;
1052
1053 metapage = BufferGetPage(metabuf);
1054 metap = HashPageGetMeta(metapage);
1055
1056 metap->hashm_ntuples -= xldata->ntuples;
1057
1058 PageSetLSN(metapage, lsn);
1059 MarkBufferDirty(metabuf);
1060 }
1061 if (BufferIsValid(metabuf))
1062 UnlockReleaseBuffer(metabuf);
1063}
1064
1065void
1067{
1068 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1069
1070 switch (info)
1071 {
1074 break;
1077 break;
1078 case XLOG_HASH_INSERT:
1079 hash_xlog_insert(record);
1080 break;
1083 break;
1086 break;
1088 hash_xlog_split_page(record);
1089 break;
1092 break;
1095 break;
1097 hash_xlog_squeeze_page(record);
1098 break;
1099 case XLOG_HASH_DELETE:
1100 hash_xlog_delete(record);
1101 break;
1104 break;
1107 break;
1110 break;
1111 default:
1112 elog(PANIC, "hash_redo: unknown op code %u", info);
1113 }
1114}
1115
1116/*
1117 * Mask a hash page before performing consistency checks on it.
1118 */
1119void
1120hash_mask(char *pagedata, BlockNumber blkno)
1121{
1122 Page page = (Page) pagedata;
1123 HashPageOpaque opaque;
1124 int pagetype;
1125
1127
1128 mask_page_hint_bits(page);
1129 mask_unused_space(page);
1130
1131 opaque = HashPageGetOpaque(page);
1132
1133 pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1134 if (pagetype == LH_UNUSED_PAGE)
1135 {
1136 /*
1137 * Mask everything on a UNUSED page.
1138 */
1139 mask_page_content(page);
1140 }
1141 else if (pagetype == LH_BUCKET_PAGE ||
1142 pagetype == LH_OVERFLOW_PAGE)
1143 {
1144 /*
1145 * In hash bucket and overflow pages, it is possible to modify the
1146 * LP_FLAGS without emitting any WAL record. Hence, mask the line
1147 * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1148 */
1149 mask_lp_flags(page);
1150 }
1151
1152 /*
1153 * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1154 * unlogged. So, mask it. See _hash_kill_items() for details.
1155 */
1156 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1157}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define CLRBIT(x, i)
Definition: blutils.c:28
#define SETBIT(x, i)
Definition: blutils.c:29
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_content(Page page)
Definition: bufmask.c:119
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4223
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5383
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2943
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:5346
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:425
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:414
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:49
@ RBM_NORMAL
Definition: bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:376
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1160
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
PageData * Page
Definition: bufpage.h:81
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define MAXALIGN(LEN)
Definition: c.h:815
uint8_t uint8
Definition: c.h:541
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:228
uint16_t uint16
Definition: c.h:542
uint32_t uint32
Definition: c.h:543
size_t Size
Definition: c.h:615
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define LH_PAGE_TYPE
Definition: hash.h:63
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define InvalidBucket
Definition: hash.h:37
static void hash_xlog_split_cleanup(XLogReaderState *record)
Definition: hash_xlog.c:938
static void hash_xlog_add_ovfl_page(XLogReaderState *record)
Definition: hash_xlog.c:172
static void hash_xlog_split_page(XLogReaderState *record)
Definition: hash_xlog.c:427
static void hash_xlog_init_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:27
void hash_mask(char *pagedata, BlockNumber blkno)
Definition: hash_xlog.c:1120
static void hash_xlog_split_complete(XLogReaderState *record)
Definition: hash_xlog.c:441
static void hash_xlog_update_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:963
static void hash_xlog_vacuum_one_page(XLogReaderState *record)
Definition: hash_xlog.c:990
static void hash_xlog_squeeze_page(XLogReaderState *record)
Definition: hash_xlog.c:626
static void hash_xlog_insert(XLogReaderState *record)
Definition: hash_xlog.c:125
static void hash_xlog_move_page_contents(XLogReaderState *record)
Definition: hash_xlog.c:500
static void hash_xlog_split_allocate_page(XLogReaderState *record)
Definition: hash_xlog.c:310
void hash_redo(XLogReaderState *record)
Definition: hash_xlog.c:1066
static void hash_xlog_delete(XLogReaderState *record)
Definition: hash_xlog.c:860
static void hash_xlog_init_bitmap_page(XLogReaderState *record)
Definition: hash_xlog.c:63
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define XLOG_HASH_SPLIT_PAGE
Definition: hash_xlog.h:32
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
#define XLOG_HASH_SPLIT_COMPLETE
Definition: hash_xlog.h:33
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
Assert(PointerIsAligned(start, uint64))
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:777
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
IndexTupleData * IndexTuple
Definition: itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition: itup.h:71
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void size_t len
const void * data
while(p+4<=pend)
static char * buf
Definition: pg_test_fsync.c:72
ForkNumber
Definition: relpath.h:56
@ INIT_FORKNUM
Definition: relpath.h:61
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:468
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_firstfree
Definition: hash.h:259
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_highmask
Definition: hash.h:255
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
XLogRecPtr EndRecPtr
Definition: xlogreader.h:206
bool clear_dead_marking
Definition: hash_xlog.h:181
bool is_primary_bucket_page
Definition: hash_xlog.h:183
RegProcedure procid
Definition: hash_xlog.h:214
OffsetNumber offnum
Definition: hash_xlog.h:58
BlockNumber prevblkno
Definition: hash_xlog.h:156
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:159
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:162
BlockNumber nextblkno
Definition: hash_xlog.h:157
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:248
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: hash_xlog.h:254
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2045
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1991
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:409
#define XLogRecGetData(decoder)
Definition: xlogreader.h:414
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:419
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:303
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:315
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:340
#define InHotStandby
Definition: xlogutils.h:60
XLogRedoAction
Definition: xlogutils.h:73
@ BLK_RESTORED
Definition: xlogutils.h:76
@ BLK_NEEDS_REDO
Definition: xlogutils.h:74
@ BLK_NOTFOUND
Definition: xlogutils.h:77