@@ -391,6 +391,8 @@ _hash_firstfreebit(uint32 map)
391391 * Remove this overflow page from its bucket's chain, and mark the page as
392392 * free. On entry, ovflbuf is write-locked; it is released before exiting.
393393 *
394+ * Add the tuples (itups) to wbuf.
395+ *
394396 * Since this function is invoked in VACUUM, we provide an access strategy
395397 * parameter that controls fetches of the bucket pages.
396398 *
@@ -403,13 +405,16 @@ _hash_firstfreebit(uint32 map)
403405 * has a lock on same.
404406 */
405407BlockNumber
406- _hash_freeovflpage (Relation rel , Buffer ovflbuf , Buffer wbuf ,
408+ _hash_freeovflpage (Relation rel , Buffer bucketbuf , Buffer ovflbuf ,
409+ Buffer wbuf , IndexTuple * itups , OffsetNumber * itup_offsets ,
410+ Size * tups_size , uint16 nitups ,
407411 BufferAccessStrategy bstrategy )
408412{
409413 HashMetaPage metap ;
410414 Buffer metabuf ;
411415 Buffer mapbuf ;
412416 Buffer prevbuf = InvalidBuffer ;
417+ Buffer nextbuf = InvalidBuffer ;
413418 BlockNumber ovflblkno ;
414419 BlockNumber prevblkno ;
415420 BlockNumber blkno ;
@@ -434,15 +439,6 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
434439 writeblkno = BufferGetBlockNumber (wbuf );
435440 bucket = ovflopaque -> hasho_bucket ;
436441
437- /*
438- * Zero the page for debugging's sake; then write and release it. (Note:
439- * if we failed to zero the page here, we'd have problems with the Assert
440- * in _hash_pageinit() when the page is reused.)
441- */
442- MemSet (ovflpage , 0 , BufferGetPageSize (ovflbuf ));
443- MarkBufferDirty (ovflbuf );
444- _hash_relbuf (rel , ovflbuf );
445-
446442 /*
447443 * Fix up the bucket chain. this is a doubly-linked list, so we must fix
448444 * up the bucket chain members behind and ahead of the overflow page being
@@ -451,9 +447,6 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
451447 */
452448 if (BlockNumberIsValid (prevblkno ))
453449 {
454- Page prevpage ;
455- HashPageOpaque prevopaque ;
456-
457450 if (prevblkno == writeblkno )
458451 prevbuf = wbuf ;
459452 else
@@ -462,32 +455,13 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
462455 HASH_WRITE ,
463456 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE ,
464457 bstrategy );
465-
466- prevpage = BufferGetPage (prevbuf );
467- prevopaque = (HashPageOpaque ) PageGetSpecialPointer (prevpage );
468-
469- Assert (prevopaque -> hasho_bucket == bucket );
470- prevopaque -> hasho_nextblkno = nextblkno ;
471-
472- MarkBufferDirty (prevbuf );
473- if (prevblkno != writeblkno )
474- _hash_relbuf (rel , prevbuf );
475458 }
476459 if (BlockNumberIsValid (nextblkno ))
477- {
478- Buffer nextbuf = _hash_getbuf_with_strategy (rel ,
479- nextblkno ,
480- HASH_WRITE ,
481- LH_OVERFLOW_PAGE ,
482- bstrategy );
483- Page nextpage = BufferGetPage (nextbuf );
484- HashPageOpaque nextopaque = (HashPageOpaque ) PageGetSpecialPointer (nextpage );
485-
486- Assert (nextopaque -> hasho_bucket == bucket );
487- nextopaque -> hasho_prevblkno = prevblkno ;
488- MarkBufferDirty (nextbuf );
489- _hash_relbuf (rel , nextbuf );
490- }
460+ nextbuf = _hash_getbuf_with_strategy (rel ,
461+ nextblkno ,
462+ HASH_WRITE ,
463+ LH_OVERFLOW_PAGE ,
464+ bstrategy );
491465
492466 /* Note: bstrategy is intentionally not used for metapage and bitmap */
493467
@@ -508,24 +482,71 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
508482 /* Release metapage lock while we access the bitmap page */
509483 LockBuffer (metabuf , BUFFER_LOCK_UNLOCK );
510484
511- /* Clear the bitmap bit to indicate that this overflow page is free */
485+ /* read the bitmap page to clear the bitmap bit */
512486 mapbuf = _hash_getbuf (rel , blkno , HASH_WRITE , LH_BITMAP_PAGE );
513487 mappage = BufferGetPage (mapbuf );
514488 freep = HashPageGetBitmap (mappage );
515489 Assert (ISSET (freep , bitmapbit ));
516- CLRBIT (freep , bitmapbit );
517- MarkBufferDirty (mapbuf );
518- _hash_relbuf (rel , mapbuf );
519490
520491 /* Get write-lock on metapage to update firstfree */
521492 LockBuffer (metabuf , BUFFER_LOCK_EXCLUSIVE );
522493
494+ /*
495+ * we have to insert tuples on the "write" page, being careful to preserve
496+ * hashkey ordering. (If we insert many tuples into the same "write" page
497+ * it would be worth qsort'ing them).
498+ */
499+ if (nitups > 0 )
500+ {
501+ _hash_pgaddmultitup (rel , wbuf , itups , itup_offsets , nitups );
502+ MarkBufferDirty (wbuf );
503+ }
504+
505+ /* Initialize the freed overflow page. */
506+ _hash_pageinit (ovflpage , BufferGetPageSize (ovflbuf ));
507+ MarkBufferDirty (ovflbuf );
508+
509+ if (BufferIsValid (prevbuf ))
510+ {
511+ Page prevpage = BufferGetPage (prevbuf );
512+ HashPageOpaque prevopaque = (HashPageOpaque ) PageGetSpecialPointer (prevpage );
513+
514+ Assert (prevopaque -> hasho_bucket == bucket );
515+ prevopaque -> hasho_nextblkno = nextblkno ;
516+ MarkBufferDirty (prevbuf );
517+ }
518+ if (BufferIsValid (nextbuf ))
519+ {
520+ Page nextpage = BufferGetPage (nextbuf );
521+ HashPageOpaque nextopaque = (HashPageOpaque ) PageGetSpecialPointer (nextpage );
522+
523+ Assert (nextopaque -> hasho_bucket == bucket );
524+ nextopaque -> hasho_prevblkno = prevblkno ;
525+ MarkBufferDirty (nextbuf );
526+ }
527+
528+ /* Clear the bitmap bit to indicate that this overflow page is free */
529+ CLRBIT (freep , bitmapbit );
530+ MarkBufferDirty (mapbuf );
531+
523532 /* if this is now the first free page, update hashm_firstfree */
524533 if (ovflbitno < metap -> hashm_firstfree )
525534 {
526535 metap -> hashm_firstfree = ovflbitno ;
527536 MarkBufferDirty (metabuf );
528537 }
538+
539+ /* release previous bucket if it is not same as write bucket */
540+ if (BufferIsValid (prevbuf ) && prevblkno != writeblkno )
541+ _hash_relbuf (rel , prevbuf );
542+
543+ if (BufferIsValid (ovflbuf ))
544+ _hash_relbuf (rel , ovflbuf );
545+
546+ if (BufferIsValid (nextbuf ))
547+ _hash_relbuf (rel , nextbuf );
548+
549+ _hash_relbuf (rel , mapbuf );
529550 _hash_relbuf (rel , metabuf );
530551
531552 return nextblkno ;
@@ -640,7 +661,6 @@ _hash_squeezebucket(Relation rel,
640661 Page rpage ;
641662 HashPageOpaque wopaque ;
642663 HashPageOpaque ropaque ;
643- bool wbuf_dirty ;
644664
645665 /*
646666 * start squeezing into the primary bucket page.
@@ -686,15 +706,21 @@ _hash_squeezebucket(Relation rel,
686706 /*
687707 * squeeze the tuples.
688708 */
689- wbuf_dirty = false;
690709 for (;;)
691710 {
692711 OffsetNumber roffnum ;
693712 OffsetNumber maxroffnum ;
694713 OffsetNumber deletable [MaxOffsetNumber ];
695- int ndeletable = 0 ;
714+ IndexTuple itups [MaxIndexTuplesPerPage ];
715+ Size tups_size [MaxIndexTuplesPerPage ];
716+ OffsetNumber itup_offsets [MaxIndexTuplesPerPage ];
717+ uint16 ndeletable = 0 ;
718+ uint16 nitups = 0 ;
719+ Size all_tups_size = 0 ;
720+ int i ;
696721 bool retain_pin = false;
697722
723+ readpage :
698724 /* Scan each tuple in "read" page */
699725 maxroffnum = PageGetMaxOffsetNumber (rpage );
700726 for (roffnum = FirstOffsetNumber ;
@@ -715,11 +741,13 @@ _hash_squeezebucket(Relation rel,
715741
716742 /*
717743 * Walk up the bucket chain, looking for a page big enough for
718- * this item. Exit if we reach the read page.
744+ * this item and all other accumulated items. Exit if we reach
745+ * the read page.
719746 */
720- while (PageGetFreeSpace (wpage ) < itemsz )
747+ while (PageGetFreeSpaceForMultipleTuples (wpage , nitups + 1 ) < ( all_tups_size + itemsz ) )
721748 {
722749 Buffer next_wbuf = InvalidBuffer ;
750+ bool tups_moved = false;
723751
724752 Assert (!PageIsEmpty (wpage ));
725753
@@ -737,12 +765,30 @@ _hash_squeezebucket(Relation rel,
737765 LH_OVERFLOW_PAGE ,
738766 bstrategy );
739767
768+ if (nitups > 0 )
769+ {
770+ Assert (nitups == ndeletable );
771+
772+ /*
773+ * we have to insert tuples on the "write" page, being
774+ * careful to preserve hashkey ordering. (If we insert
775+ * many tuples into the same "write" page it would be
776+ * worth qsort'ing them).
777+ */
778+ _hash_pgaddmultitup (rel , wbuf , itups , itup_offsets , nitups );
779+ MarkBufferDirty (wbuf );
780+
781+ /* Delete tuples we already moved off read page */
782+ PageIndexMultiDelete (rpage , deletable , ndeletable );
783+ MarkBufferDirty (rbuf );
784+
785+ tups_moved = true;
786+ }
787+
740788 /*
741789 * release the lock on previous page after acquiring the lock
742790 * on next page
743791 */
744- if (wbuf_dirty )
745- MarkBufferDirty (wbuf );
746792 if (retain_pin )
747793 LockBuffer (wbuf , BUFFER_LOCK_UNLOCK );
748794 else
@@ -751,12 +797,6 @@ _hash_squeezebucket(Relation rel,
751797 /* nothing more to do if we reached the read page */
752798 if (rblkno == wblkno )
753799 {
754- if (ndeletable > 0 )
755- {
756- /* Delete tuples we already moved off read page */
757- PageIndexMultiDelete (rpage , deletable , ndeletable );
758- MarkBufferDirty (rbuf );
759- }
760800 _hash_relbuf (rel , rbuf );
761801 return ;
762802 }
@@ -765,21 +805,34 @@ _hash_squeezebucket(Relation rel,
765805 wpage = BufferGetPage (wbuf );
766806 wopaque = (HashPageOpaque ) PageGetSpecialPointer (wpage );
767807 Assert (wopaque -> hasho_bucket == bucket );
768- wbuf_dirty = false;
769808 retain_pin = false;
770- }
771809
772- /*
773- * we have found room so insert on the "write" page, being careful
774- * to preserve hashkey ordering. (If we insert many tuples into
775- * the same "write" page it would be worth qsort'ing instead of
776- * doing repeated _hash_pgaddtup.)
777- */
778- (void ) _hash_pgaddtup (rel , wbuf , itemsz , itup );
779- wbuf_dirty = true;
810+ /* be tidy */
811+ for (i = 0 ; i < nitups ; i ++ )
812+ pfree (itups [i ]);
813+ nitups = 0 ;
814+ all_tups_size = 0 ;
815+ ndeletable = 0 ;
816+
817+ /*
818+ * after moving the tuples, rpage would have been compacted,
819+ * so we need to rescan it.
820+ */
821+ if (tups_moved )
822+ goto readpage ;
823+ }
780824
781825 /* remember tuple for deletion from "read" page */
782826 deletable [ndeletable ++ ] = roffnum ;
827+
828+ /*
829+ * we need a copy of index tuples as they can be freed as part of
830+ * overflow page, however we need them to write a WAL record in
831+ * _hash_freeovflpage.
832+ */
833+ itups [nitups ] = CopyIndexTuple (itup );
834+ tups_size [nitups ++ ] = itemsz ;
835+ all_tups_size += itemsz ;
783836 }
784837
785838 /*
@@ -797,10 +850,12 @@ _hash_squeezebucket(Relation rel,
797850 Assert (BlockNumberIsValid (rblkno ));
798851
799852 /* free this overflow page (releases rbuf) */
800- _hash_freeovflpage (rel , rbuf , wbuf , bstrategy );
853+ _hash_freeovflpage (rel , bucket_buf , rbuf , wbuf , itups , itup_offsets ,
854+ tups_size , nitups , bstrategy );
801855
802- if (wbuf_dirty )
803- MarkBufferDirty (wbuf );
856+ /* be tidy */
857+ for (i = 0 ; i < nitups ; i ++ )
858+ pfree (itups [i ]);
804859
805860 /* are we freeing the page adjacent to wbuf? */
806861 if (rblkno == wblkno )
0 commit comments