2626
2727/* non-export function prototypes */
2828static void gistfixsplit (GISTInsertState * state , GISTSTATE * giststate );
29+ static bool gistinserttuple (GISTInsertState * state , GISTInsertStack * stack ,
30+ GISTSTATE * giststate , IndexTuple tuple , OffsetNumber oldoffnum );
2931static bool gistinserttuples (GISTInsertState * state , GISTInsertStack * stack ,
3032 GISTSTATE * giststate ,
3133 IndexTuple * tuples , int ntup , OffsetNumber oldoffnum ,
32- Buffer leftchild );
34+ Buffer leftchild , Buffer rightchild ,
35+ bool unlockbuf , bool unlockleftchild );
3336static void gistfinishsplit (GISTInsertState * state , GISTInsertStack * stack ,
34- GISTSTATE * giststate , List * splitinfo );
37+ GISTSTATE * giststate , List * splitinfo , bool releasebuf );
3538
3639
3740#define ROTATEDIST (d ) do { \
@@ -609,15 +612,15 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
609612 /*
610613 * Update the tuple.
611614 *
612- * We still hold the lock after gistinserttuples (), but it
615+ * We still hold the lock after gistinserttuple (), but it
613616 * might have to split the page to make the updated tuple fit.
614617 * In that case the updated tuple might migrate to the other
615618 * half of the split, so we have to go back to the parent and
616619 * descend back to the half that's a better fit for the new
617620 * tuple.
618621 */
619- if (gistinserttuples (& state , stack , giststate , & newtup , 1 ,
620- downlinkoffnum , InvalidBuffer ))
622+ if (gistinserttuple (& state , stack , giststate , newtup ,
623+ downlinkoffnum ))
621624 {
622625 /*
623626 * If this was a root split, the root page continues to be
@@ -703,8 +706,8 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
703706
704707 /* now state.stack->(page, buffer and blkno) points to leaf page */
705708
706- gistinserttuples (& state , stack , giststate , & itup , 1 ,
707- InvalidOffsetNumber , InvalidBuffer );
709+ gistinserttuple (& state , stack , giststate , itup ,
710+ InvalidOffsetNumber );
708711 LockBuffer (stack -> buffer , GIST_UNLOCK );
709712
710713 /* Release any pins we might still hold before exiting */
@@ -1028,51 +1031,107 @@ gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
10281031 }
10291032
10301033 /* Insert the downlinks */
1031- gistfinishsplit (state , stack , giststate , splitinfo );
1034+ gistfinishsplit (state , stack , giststate , splitinfo , false );
10321035}
10331036
10341037/*
1035- * Insert tuples to stack->buffer. If 'oldoffnum' is valid, the new tuples
1036- * replace an old tuple at oldoffnum. The caller must hold an exclusive lock
1037- * on the page.
1038+ * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1039+ * tuple at ' oldoffnum' is replaced, otherwise the tuple is inserted as new.
1040+ * 'stack' represents the path from the root to the page being updated .
10381041 *
1039- * If leftchild is valid, we're inserting/updating the downlink for the
1040- * page to the right of leftchild. We clear the F_FOLLOW_RIGHT flag and
1041- * update NSN on leftchild, atomically with the insertion of the downlink.
1042+ * The caller must hold an exclusive lock on stack->buffer. The lock is still
1043+ * held on return, but the page might not contain the inserted tuple if the
1044+ * page was split. The function returns true if the page was split, false
1045+ * otherwise.
1046+ */
1047+ static bool
1048+ gistinserttuple (GISTInsertState * state , GISTInsertStack * stack ,
1049+ GISTSTATE * giststate , IndexTuple tuple , OffsetNumber oldoffnum )
1050+ {
1051+ return gistinserttuples (state , stack , giststate , & tuple , 1 , oldoffnum ,
1052+ InvalidBuffer , InvalidBuffer , false, false);
1053+ }
1054+
1055+ /* ----------------
1056+ * An extended workhorse version of gistinserttuple(). This version allows
1057+ * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1058+ * This is used to recursively update the downlinks in the parent when a page
1059+ * is split.
10421060 *
1043- * Returns 'true' if the page had to be split. On return, we will continue
1044- * to hold an exclusive lock on state->stack->buffer, but if we had to split
1045- * the page, it might not contain the tuple we just inserted/updated.
1061+ * If leftchild and rightchild are valid, we're inserting/replacing the
1062+ * downlink for rightchild, and leftchild is its left sibling. We clear the
1063+ * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1064+ * insertion of the downlink.
1065+ *
1066+ * To avoid holding locks for longer than necessary, when recursing up the
1067+ * tree to update the parents, the locking is a bit peculiar here. On entry,
1068+ * the caller must hold an exclusive lock on stack->buffer, as well as
1069+ * leftchild and rightchild if given. On return:
1070+ *
1071+ * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1072+ * always kept pinned, however.
1073+ * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1074+ * is kept pinned.
1075+ * - Lock and pin on 'rightchild' are always released.
1076+ *
1077+ * Returns 'true' if the page had to be split. Note that if the page had
1078+ * be split, the inserted/updated might've been inserted to a right sibling
1079+ * of stack->buffer instead of stack->buffer itself.
10461080 */
10471081static bool
10481082gistinserttuples (GISTInsertState * state , GISTInsertStack * stack ,
10491083 GISTSTATE * giststate ,
10501084 IndexTuple * tuples , int ntup , OffsetNumber oldoffnum ,
1051- Buffer leftchild )
1085+ Buffer leftchild , Buffer rightchild ,
1086+ bool unlockbuf , bool unlockleftchild )
10521087{
10531088 List * splitinfo ;
10541089 bool is_split ;
10551090
1091+ /* Insert the tuple(s) to the page, splitting the page if necessary */
10561092 is_split = gistplacetopage (state -> r , state -> freespace , giststate ,
10571093 stack -> buffer ,
10581094 tuples , ntup , oldoffnum ,
10591095 leftchild ,
10601096 & splitinfo ,
10611097 true);
1098+
1099+ /*
1100+ * Before recursing up in case the page was split, release locks on the
1101+ * child pages. We don't need to keep them locked when updating the
1102+ * parent.
1103+ */
1104+ if (BufferIsValid (rightchild ))
1105+ UnlockReleaseBuffer (rightchild );
1106+ if (BufferIsValid (leftchild ) && unlockleftchild )
1107+ LockBuffer (leftchild , GIST_UNLOCK );
1108+
1109+ /*
1110+ * If we had to split, insert/update the downlinks in the parent. If the
1111+ * caller requested us to release the lock on stack->buffer, tell
1112+ * gistfinishsplit() to do that as soon as it's safe to do so. If we
1113+ * didn't have to split, release it ourselves.
1114+ */
10621115 if (splitinfo )
1063- gistfinishsplit (state , stack , giststate , splitinfo );
1116+ gistfinishsplit (state , stack , giststate , splitinfo , unlockbuf );
1117+ else if (unlockbuf )
1118+ LockBuffer (stack -> buffer , GIST_UNLOCK );
10641119
10651120 return is_split ;
10661121}
10671122
10681123/*
1069- * Finish an incomplete split by inserting/updating the downlinks in
1070- * parent page. 'splitinfo' contains all the child pages, exclusively-locked,
1071- * involved in the split, from left-to-right.
1124+ * Finish an incomplete split by inserting/updating the downlinks in parent
1125+ * page. 'splitinfo' contains all the child pages involved in the split,
1126+ * from left-to-right.
1127+ *
1128+ * On entry, the caller must hold a lock on stack->buffer and all the child
1129+ * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1130+ * released on return. The child pages are always unlocked and unpinned.
10721131 */
10731132static void
10741133gistfinishsplit (GISTInsertState * state , GISTInsertStack * stack ,
1075- GISTSTATE * giststate , List * splitinfo )
1134+ GISTSTATE * giststate , List * splitinfo , bool unlockbuf )
10761135{
10771136 ListCell * lc ;
10781137 List * reversed ;
@@ -1101,6 +1160,10 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
11011160 LockBuffer (stack -> parent -> buffer , GIST_EXCLUSIVE );
11021161 gistFindCorrectParent (state -> r , stack );
11031162
1163+ /*
1164+ * insert downlinks for the siblings from right to left, until there are
1165+ * only two siblings left.
1166+ */
11041167 while (list_length (reversed ) > 2 )
11051168 {
11061169 right = (GISTPageSplitInfo * ) linitial (reversed );
@@ -1109,15 +1172,15 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
11091172 if (gistinserttuples (state , stack -> parent , giststate ,
11101173 & right -> downlink , 1 ,
11111174 InvalidOffsetNumber ,
1112- left -> buf ))
1175+ left -> buf , right -> buf , false, false ))
11131176 {
11141177 /*
11151178 * If the parent page was split, need to relocate the original
11161179 * parent pointer.
11171180 */
11181181 gistFindCorrectParent (state -> r , stack );
11191182 }
1120- UnlockReleaseBuffer ( right -> buf );
1183+ /* gistinserttuples() released the lock on right->buf. */
11211184 reversed = list_delete_first (reversed );
11221185 }
11231186
@@ -1134,9 +1197,10 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
11341197 gistinserttuples (state , stack -> parent , giststate ,
11351198 tuples , 2 ,
11361199 stack -> downlinkoffnum ,
1137- left -> buf );
1138- LockBuffer (stack -> parent -> buffer , GIST_UNLOCK );
1139- UnlockReleaseBuffer (right -> buf );
1200+ left -> buf , right -> buf ,
1201+ true, /* Unlock parent */
1202+ unlockbuf /* Unlock stack->buffer if caller wants that */
1203+ );
11401204 Assert (left -> buf == stack -> buffer );
11411205}
11421206
0 commit comments