@@ -954,18 +954,38 @@ _bt_delitems_delete(Relation rel, Buffer buf,
954954 END_CRIT_SECTION ();
955955}
956956
957+ /*
958+ * Returns true, if the given block has the half-dead flag set.
959+ */
960+ static bool
961+ _bt_is_page_halfdead (Relation rel , BlockNumber blk )
962+ {
963+ Buffer buf ;
964+ Page page ;
965+ BTPageOpaque opaque ;
966+ bool result ;
967+
968+ buf = _bt_getbuf (rel , blk , BT_READ );
969+ page = BufferGetPage (buf );
970+ opaque = (BTPageOpaque ) PageGetSpecialPointer (page );
971+
972+ result = P_ISHALFDEAD (opaque );
973+ _bt_relbuf (rel , buf );
974+
975+ return result ;
976+ }
977+
957978/*
958979 * Subroutine to find the parent of the branch we're deleting. This climbs
959980 * up the tree until it finds a page with more than one child, i.e. a page
960981 * that will not be totally emptied by the deletion. The chain of pages below
961- * it, with one downlink each, will be part of the branch that we need to
962- * delete.
982+ * it, with one downlink each, will form the branch that we need to delete.
963983 *
964984 * If we cannot remove the downlink from the parent, because it's the
965985 * rightmost entry, returns false. On success, *topparent and *topoff are set
966986 * to the buffer holding the parent, and the offset of the downlink in it.
967987 * *topparent is write-locked, the caller is responsible for releasing it when
968- * done. *target is set to the topmost page in the branch to-be-deleted, ie .
988+ * done. *target is set to the topmost page in the branch to-be-deleted, i.e .
969989 * the page whose downlink *topparent / *topoff point to, and *rightsib to its
970990 * right sibling.
971991 *
@@ -994,7 +1014,10 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
9941014 BTPageOpaque opaque ;
9951015 BlockNumber leftsib ;
9961016
997- /* Locate the parent's downlink (updating the stack entry if needed) */
1017+ /*
1018+ * Locate the downlink of "child" in the parent (updating the stack entry
1019+ * if needed)
1020+ */
9981021 ItemPointerSet (& (stack -> bts_btentry .t_tid ), child , P_HIKEY );
9991022 pbuf = _bt_getstackbuf (rel , stack , BT_WRITE );
10001023 if (pbuf == InvalidBuffer )
@@ -1066,6 +1089,17 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
10661089 _bt_relbuf (rel , lbuf );
10671090 }
10681091
1092+ /*
1093+ * Perform the same check on this internal level that
1094+ * _bt_mark_page_halfdead performed on the leaf level.
1095+ */
1096+ if (_bt_is_page_halfdead (rel , * rightsib ))
1097+ {
1098+ elog (DEBUG1 , "could not delete page %u because its right sibling %u is half-dead" ,
1099+ parent , * rightsib );
1100+ return false;
1101+ }
1102+
10691103 return _bt_lock_branch_parent (rel , parent , stack -> bts_parent ,
10701104 topparent , topoff , target , rightsib );
10711105 }
@@ -1234,6 +1268,13 @@ _bt_pagedel(Relation rel, Buffer buf)
12341268 lbuf = _bt_getbuf (rel , leftsib , BT_READ );
12351269 lpage = BufferGetPage (lbuf );
12361270 lopaque = (BTPageOpaque ) PageGetSpecialPointer (lpage );
1271+ /*
1272+ * If the left sibling is split again by another backend,
1273+ * after we released the lock, we know that the first
1274+ * split must have finished, because we don't allow an
1275+ * incompletely-split page to be split again. So we don't
1276+ * need to walk right here.
1277+ */
12371278 if (lopaque -> btpo_next == BufferGetBlockNumber (buf ) &&
12381279 P_INCOMPLETE_SPLIT (lopaque ))
12391280 {
@@ -1338,6 +1379,22 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
13381379 leafblkno = BufferGetBlockNumber (leafbuf );
13391380 leafrightsib = opaque -> btpo_next ;
13401381
1382+ /*
1383+ * Before attempting to lock the parent page, check that the right
1384+ * sibling is not in half-dead state. A half-dead right sibling would
1385+ * have no downlink in the parent, which would be highly confusing later
1386+ * when we delete the downlink that follows the current page's downlink.
1387+ * (I believe the deletion would work correctly, but it would fail the
1388+ * cross-check we make that the following downlink points to the right
1389+ * sibling of the delete page.)
1390+ */
1391+ if (_bt_is_page_halfdead (rel , leafrightsib ))
1392+ {
1393+ elog (DEBUG1 , "could not delete page %u because its right sibling %u is half-dead" ,
1394+ leafblkno , leafrightsib );
1395+ return false;
1396+ }
1397+
13411398 /*
13421399 * We cannot delete a page that is the rightmost child of its immediate
13431400 * parent, unless it is the only child --- in which case the parent has to
0 commit comments