@@ -40,12 +40,9 @@ static void _hash_splitbucket(Relation rel, Buffer metabuf,
4040 Bucket obucket , Bucket nbucket ,
4141 Buffer obuf ,
4242 Buffer nbuf ,
43+ HTAB * htab ,
4344 uint32 maxbucket ,
4445 uint32 highmask , uint32 lowmask );
45- static void _hash_splitbucket_guts (Relation rel , Buffer metabuf ,
46- Bucket obucket , Bucket nbucket , Buffer obuf ,
47- Buffer nbuf , HTAB * htab , uint32 maxbucket ,
48- uint32 highmask , uint32 lowmask );
4946
5047
5148/*
@@ -497,7 +494,9 @@ _hash_expandtable(Relation rel, Buffer metabuf)
497494 Buffer buf_nblkno ;
498495 Buffer buf_oblkno ;
499496 Page opage ;
497+ Page npage ;
500498 HashPageOpaque oopaque ;
499+ HashPageOpaque nopaque ;
501500 uint32 maxbucket ;
502501 uint32 highmask ;
503502 uint32 lowmask ;
@@ -685,18 +684,18 @@ _hash_expandtable(Relation rel, Buffer metabuf)
685684 goto fail ;
686685 }
687686
688-
689687 /*
690- * Okay to proceed with split. Update the metapage bucket mapping info.
691- *
692- * Since we are scribbling on the metapage data right in the shared
693- * buffer, any failure in this next little bit leaves us with a big
688+ * Since we are scribbling on the pages in the shared buffers, establish a
689+ * critical section. Any failure in this next code leaves us with a big
694690 * problem: the metapage is effectively corrupt but could get written back
695691 * to disk. We don't really expect any failure, but just to be sure,
696692 * establish a critical section.
697693 */
698694 START_CRIT_SECTION ();
699695
696+ /*
697+ * Okay to proceed with split. Update the metapage bucket mapping info.
698+ */
700699 metap -> hashm_maxbucket = new_bucket ;
701700
702701 if (new_bucket > metap -> hashm_highmask )
@@ -718,8 +717,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)
718717 metap -> hashm_ovflpoint = spare_ndx ;
719718 }
720719
721- /* Done mucking with metapage */
722- END_CRIT_SECTION ();
720+ MarkBufferDirty (metabuf );
723721
724722 /*
725723 * Copy bucket mapping info now; this saves re-accessing the meta page
@@ -732,16 +730,51 @@ _hash_expandtable(Relation rel, Buffer metabuf)
732730 highmask = metap -> hashm_highmask ;
733731 lowmask = metap -> hashm_lowmask ;
734732
735- /* Write out the metapage and drop lock, but keep pin */
736- MarkBufferDirty (metabuf );
733+ opage = BufferGetPage (buf_oblkno );
734+ oopaque = (HashPageOpaque ) PageGetSpecialPointer (opage );
735+
736+ /*
737+ * Mark the old bucket to indicate that split is in progress. (At
738+ * operation end, we will clear the split-in-progress flag.) Also,
739+ * for a primary bucket page, hasho_prevblkno stores the number of
740+ * buckets that existed as of the last split, so we must update that
741+ * value here.
742+ */
743+ oopaque -> hasho_flag |= LH_BUCKET_BEING_SPLIT ;
744+ oopaque -> hasho_prevblkno = maxbucket ;
745+
746+ MarkBufferDirty (buf_oblkno );
747+
748+ npage = BufferGetPage (buf_nblkno );
749+
750+ /*
751+ * initialize the new bucket's primary page and mark it to indicate that
752+ * split is in progress.
753+ */
754+ nopaque = (HashPageOpaque ) PageGetSpecialPointer (npage );
755+ nopaque -> hasho_prevblkno = maxbucket ;
756+ nopaque -> hasho_nextblkno = InvalidBlockNumber ;
757+ nopaque -> hasho_bucket = new_bucket ;
758+ nopaque -> hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_BEING_POPULATED ;
759+ nopaque -> hasho_page_id = HASHO_PAGE_ID ;
760+
761+ MarkBufferDirty (buf_nblkno );
762+
763+ END_CRIT_SECTION ();
764+
765+ /* drop lock, but keep pin */
737766 LockBuffer (metabuf , BUFFER_LOCK_UNLOCK );
738767
739768 /* Relocate records to the new bucket */
740769 _hash_splitbucket (rel , metabuf ,
741770 old_bucket , new_bucket ,
742- buf_oblkno , buf_nblkno ,
771+ buf_oblkno , buf_nblkno , NULL ,
743772 maxbucket , highmask , lowmask );
744773
774+ /* all done, now release the locks and pins on primary buckets. */
775+ _hash_relbuf (rel , buf_oblkno );
776+ _hash_relbuf (rel , buf_nblkno );
777+
745778 return ;
746779
747780 /* Here if decide not to split or fail to acquire old bucket lock */
@@ -803,10 +836,16 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
803836/*
804837 * _hash_splitbucket -- split 'obucket' into 'obucket' and 'nbucket'
805838 *
839+ * This routine is used to partition the tuples between old and new bucket and
840+ * is used to finish the incomplete split operations. To finish the previously
841+ * interrupted split operation, the caller needs to fill htab. If htab is set,
842+ * then we skip the movement of tuples that exists in htab, otherwise NULL
843+ * value of htab indicates movement of all the tuples that belong to the new
844+ * bucket.
845+ *
806846 * We are splitting a bucket that consists of a base bucket page and zero
807847 * or more overflow (bucket chain) pages. We must relocate tuples that
808- * belong in the new bucket, and compress out any free space in the old
809- * bucket.
848+ * belong in the new bucket.
810849 *
811850 * The caller must hold cleanup locks on both buckets to ensure that
812851 * no one else is trying to access them (see README).
@@ -832,72 +871,10 @@ _hash_splitbucket(Relation rel,
832871 Bucket nbucket ,
833872 Buffer obuf ,
834873 Buffer nbuf ,
874+ HTAB * htab ,
835875 uint32 maxbucket ,
836876 uint32 highmask ,
837877 uint32 lowmask )
838- {
839- Page opage ;
840- Page npage ;
841- HashPageOpaque oopaque ;
842- HashPageOpaque nopaque ;
843-
844- opage = BufferGetPage (obuf );
845- oopaque = (HashPageOpaque ) PageGetSpecialPointer (opage );
846-
847- /*
848- * Mark the old bucket to indicate that split is in progress. (At
849- * operation end, we will clear the split-in-progress flag.) Also,
850- * for a primary bucket page, hasho_prevblkno stores the number of
851- * buckets that existed as of the last split, so we must update that
852- * value here.
853- */
854- oopaque -> hasho_flag |= LH_BUCKET_BEING_SPLIT ;
855- oopaque -> hasho_prevblkno = maxbucket ;
856-
857- npage = BufferGetPage (nbuf );
858-
859- /*
860- * initialize the new bucket's primary page and mark it to indicate that
861- * split is in progress.
862- */
863- nopaque = (HashPageOpaque ) PageGetSpecialPointer (npage );
864- nopaque -> hasho_prevblkno = maxbucket ;
865- nopaque -> hasho_nextblkno = InvalidBlockNumber ;
866- nopaque -> hasho_bucket = nbucket ;
867- nopaque -> hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_BEING_POPULATED ;
868- nopaque -> hasho_page_id = HASHO_PAGE_ID ;
869-
870- _hash_splitbucket_guts (rel , metabuf , obucket ,
871- nbucket , obuf , nbuf , NULL ,
872- maxbucket , highmask , lowmask );
873-
874- /* all done, now release the locks and pins on primary buckets. */
875- _hash_relbuf (rel , obuf );
876- _hash_relbuf (rel , nbuf );
877- }
878-
879- /*
880- * _hash_splitbucket_guts -- Helper function to perform the split operation
881- *
882- * This routine is used to partition the tuples between old and new bucket and
883- * to finish incomplete split operations. To finish the previously
884- * interrupted split operation, caller needs to fill htab. If htab is set, then
885- * we skip the movement of tuples that exists in htab, otherwise NULL value of
886- * htab indicates movement of all the tuples that belong to new bucket.
887- *
888- * Caller needs to lock and unlock the old and new primary buckets.
889- */
890- static void
891- _hash_splitbucket_guts (Relation rel ,
892- Buffer metabuf ,
893- Bucket obucket ,
894- Bucket nbucket ,
895- Buffer obuf ,
896- Buffer nbuf ,
897- HTAB * htab ,
898- uint32 maxbucket ,
899- uint32 highmask ,
900- uint32 lowmask )
901878{
902879 Buffer bucket_obuf ;
903880 Buffer bucket_nbuf ;
@@ -987,6 +964,7 @@ _hash_splitbucket_guts(Relation rel,
987964 {
988965 /* write out nbuf and drop lock, but keep pin */
989966 MarkBufferDirty (nbuf );
967+ /* drop lock, but keep pin */
990968 LockBuffer (nbuf , BUFFER_LOCK_UNLOCK );
991969 /* chain to a new overflow page */
992970 nbuf = _hash_addovflpage (rel , metabuf , nbuf , (nbuf == bucket_nbuf ) ? true : false);
@@ -1025,7 +1003,14 @@ _hash_splitbucket_guts(Relation rel,
10251003
10261004 /* Exit loop if no more overflow pages in old bucket */
10271005 if (!BlockNumberIsValid (oblkno ))
1006+ {
1007+ MarkBufferDirty (nbuf );
1008+ if (nbuf == bucket_nbuf )
1009+ LockBuffer (nbuf , BUFFER_LOCK_UNLOCK );
1010+ else
1011+ _hash_relbuf (rel , nbuf );
10281012 break ;
1013+ }
10291014
10301015 /* Else, advance to next old page */
10311016 obuf = _hash_getbuf (rel , oblkno , HASH_READ , LH_OVERFLOW_PAGE );
@@ -1041,17 +1026,6 @@ _hash_splitbucket_guts(Relation rel,
10411026 * To avoid deadlocks due to locking order of buckets, first lock the old
10421027 * bucket and then the new bucket.
10431028 */
1044- if (nbuf == bucket_nbuf )
1045- {
1046- MarkBufferDirty (bucket_nbuf );
1047- LockBuffer (bucket_nbuf , BUFFER_LOCK_UNLOCK );
1048- }
1049- else
1050- {
1051- MarkBufferDirty (nbuf );
1052- _hash_relbuf (rel , nbuf );
1053- }
1054-
10551029 LockBuffer (bucket_obuf , BUFFER_LOCK_EXCLUSIVE );
10561030 opage = BufferGetPage (bucket_obuf );
10571031 oopaque = (HashPageOpaque ) PageGetSpecialPointer (opage );
@@ -1192,9 +1166,9 @@ _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket,
11921166 npageopaque = (HashPageOpaque ) PageGetSpecialPointer (npage );
11931167 nbucket = npageopaque -> hasho_bucket ;
11941168
1195- _hash_splitbucket_guts (rel , metabuf , obucket ,
1196- nbucket , obuf , bucket_nbuf , tidhtab ,
1197- maxbucket , highmask , lowmask );
1169+ _hash_splitbucket (rel , metabuf , obucket ,
1170+ nbucket , obuf , bucket_nbuf , tidhtab ,
1171+ maxbucket , highmask , lowmask );
11981172
11991173 _hash_relbuf (rel , bucket_nbuf );
12001174 LockBuffer (obuf , BUFFER_LOCK_UNLOCK );
0 commit comments