@@ -162,23 +162,16 @@ findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
162162 * Remove tuples that are marked don't-cares from the tuple index array a[]
163163 * of length *len. This is applied separately to the spl_left and spl_right
164164 * arrays.
165- *
166- * Corner case: we do not wish to reduce the index array to zero length.
167- * (If we did, then the union key for this side would be null, and having just
168- * one of spl_ldatum_exists and spl_rdatum_exists be TRUE might confuse
169- * user-defined PickSplit methods.) To avoid that, we'll forcibly redefine
170- * one tuple as non-don't-care if necessary. Hence, we must be able to adjust
171- * caller's NumDontCare count.
172165 */
173166static void
174- removeDontCares (OffsetNumber * a , int * len , bool * dontcare , int * NumDontCare )
167+ removeDontCares (OffsetNumber * a , int * len , const bool * dontcare )
175168{
176169 int origlen ,
177- curlen ,
170+ newlen ,
178171 i ;
179172 OffsetNumber * curwpos ;
180173
181- origlen = curlen = * len ;
174+ origlen = newlen = * len ;
182175 curwpos = a ;
183176 for (i = 0 ; i < origlen ; i ++ )
184177 {
@@ -190,18 +183,11 @@ removeDontCares(OffsetNumber *a, int *len, bool *dontcare, int *NumDontCare)
190183 * curwpos = ai ;
191184 curwpos ++ ;
192185 }
193- else if (curlen == 1 )
194- {
195- /* corner case: don't let array become empty */
196- dontcare [ai ] = FALSE; /* mark item as non-dont-care */
197- * NumDontCare -= 1 ;
198- i -- ; /* reprocess item on next iteration */
199- }
200186 else
201- curlen -- ;
187+ newlen -- ;
202188 }
203189
204- * len = curlen ;
190+ * len = newlen ;
205191}
206192
207193/*
@@ -304,8 +290,14 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
304290 penalty2 ;
305291
306292 /*
307- * there is only one previously defined union, so we just choose swap
308- * or not by lowest penalty
293+ * There is only one previously defined union, so we just choose swap
294+ * or not by lowest penalty for that side. We can only get here if a
295+ * secondary split happened to have all NULLs in its column in the
296+ * tuples that the outer recursion level had assigned to one side.
297+ * (Note that the null checks in gistSplitByKey don't prevent the
298+ * case, because they'll only be checking tuples that were considered
299+ * don't-cares at the outer recursion level, not the tuples that went
300+ * into determining the passed-down left and right union keys.)
309301 */
310302 penalty1 = gistpenalty (giststate , attno , entry1 , false, & entrySL , false);
311303 penalty2 = gistpenalty (giststate , attno , entry1 , false, & entrySR , false);
@@ -405,13 +397,19 @@ genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC
405397 * two vectors.
406398 *
407399 * Returns FALSE if split is complete (there are no more index columns, or
408- * there is no need to consider them). Note that in this case the union
409- * keys for all columns must be computed here.
410- * Returns TRUE and v->spl_dontcare = NULL if left and right unions of attno
411- * column are the same, so we should split on next column instead.
400+ * there is no need to consider them because split is optimal already).
401+ *
402+ * Returns TRUE and v->spl_dontcare = NULL if the picksplit result is
403+ * degenerate (all tuples seem to be don't-cares), so we should just
404+ * disregard this column and split on the next column(s) instead.
405+ *
412406 * Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples
413407 * that could be relocated based on the next column(s). The don't-care
414408 * tuples have been removed from the split and must be reinserted by caller.
409+ * There is at least one non-don't-care tuple on each side of the split,
410+ * and union keys for all columns are updated to include just those tuples.
411+ *
412+ * A TRUE result implies there is at least one more index column.
415413 */
416414static bool
417415gistUserPicksplit (Relation r , GistEntryVector * entryvec , int attno , GistSplitVector * v ,
@@ -483,49 +481,57 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
483481
484482 /*
485483 * If index columns remain, then consider whether we can improve the split
486- * by using them. Even if we can't, we must compute union keys for those
487- * columns before we can return FALSE.
484+ * by using them.
488485 */
489486 v -> spl_dontcare = NULL ;
490487
491488 if (attno + 1 < giststate -> tupdesc -> natts )
492489 {
493490 int NumDontCare ;
494491
492+ /*
493+ * Make a quick check to see if left and right union keys are equal;
494+ * if so, the split is certainly degenerate, so tell caller to
495+ * re-split with the next column.
496+ */
495497 if (gistKeyIsEQ (giststate , attno , sv -> spl_ldatum , sv -> spl_rdatum ))
496- {
497- /*
498- * Left and right union keys are equal, so we can get better split
499- * by considering next column.
500- */
501498 return true;
502- }
503499
504500 /*
505- * Locate don't-care tuples, if any
501+ * Locate don't-care tuples, if any. If there are none, the split is
502+ * optimal, so just fall out and return false.
506503 */
507504 v -> spl_dontcare = (bool * ) palloc0 (sizeof (bool ) * (entryvec -> n + 1 ));
508505
509506 NumDontCare = findDontCares (r , giststate , entryvec -> vector , v , attno );
510507
511- if (NumDontCare == 0 )
508+ if (NumDontCare > 0 )
512509 {
513510 /*
514- * There are no don't-cares, so just compute the union keys for
515- * remaining columns and we're done.
511+ * Remove don't-cares from spl_left[] and spl_right[].
516512 */
517- gistunionsubkey (giststate , itup , v );
518- }
519- else
520- {
513+ removeDontCares (sv -> spl_left , & sv -> spl_nleft , v -> spl_dontcare );
514+ removeDontCares (sv -> spl_right , & sv -> spl_nright , v -> spl_dontcare );
515+
521516 /*
522- * Remove don't-cares from spl_left[] and spl_right[]. NOTE: this
523- * could reduce NumDontCare to zero.
517+ * If all tuples on either side were don't-cares, the split is
518+ * degenerate, and we're best off to ignore it and split on the
519+ * next column. (We used to try to press on with a secondary
520+ * split by forcing a random tuple on each side to be treated as
521+ * non-don't-care, but it seems unlikely that that technique
522+ * really gives a better result. Note that we don't want to try a
523+ * secondary split with empty left or right primary split sides,
524+ * because then there is no union key on that side for the
525+ * PickSplit function to try to expand, so it can have no good
526+ * figure of merit for what it's doing. Also note that this check
527+ * ensures we can't produce a bogus one-side-only split in the
528+ * NumDontCare == 1 special case below.)
524529 */
525- removeDontCares (sv -> spl_left , & sv -> spl_nleft ,
526- v -> spl_dontcare , & NumDontCare );
527- removeDontCares (sv -> spl_right , & sv -> spl_nright ,
528- v -> spl_dontcare , & NumDontCare );
530+ if (sv -> spl_nleft == 0 || sv -> spl_nright == 0 )
531+ {
532+ v -> spl_dontcare = NULL ;
533+ return true;
534+ }
529535
530536 /*
531537 * Recompute union keys, considering only non-don't-care tuples.
@@ -541,7 +547,9 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
541547 /*
542548 * If there's only one don't-care tuple then we can't do a
543549 * PickSplit on it, so just choose whether to send it left or
544- * right by comparing penalties.
550+ * right by comparing penalties. We needed the
551+ * gistunionsubkey step anyway so that we have appropriate
552+ * union keys for figuring the penalties.
545553 */
546554 OffsetNumber toMove ;
547555
@@ -556,13 +564,14 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
556564 /* ... and assign it to cheaper side */
557565 placeOne (r , giststate , v , itup [toMove - 1 ], toMove , attno + 1 );
558566
559- /* recompute the union keys including this tuple */
560- v -> spl_dontcare = NULL ;
561- gistunionsubkey (giststate , itup , v );
567+ /*
568+ * At this point the union keys are wrong, but we don't care
569+ * because we're done splitting. The outermost recursion
570+ * level of gistSplitByKey will fix things before returning.
571+ */
562572 }
563- else if ( NumDontCare > 1 )
573+ else
564574 return true;
565- /* else NumDontCare is now zero; handle same as above */
566575 }
567576 }
568577
@@ -648,7 +657,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
648657 */
649658 v -> spl_risnull [attno ] = v -> spl_lisnull [attno ] = TRUE;
650659
651- if (attno + 1 < r -> rd_att -> natts )
660+ if (attno + 1 < giststate -> tupdesc -> natts )
652661 gistSplitByKey (r , page , itup , len , giststate , v , attno + 1 );
653662 else
654663 gistSplitHalf (& v -> splitVector , len );
@@ -673,28 +682,31 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
673682 else
674683 v -> splitVector .spl_left [v -> splitVector .spl_nleft ++ ] = i ;
675684
676- /* Must compute union keys for this and any following columns */
677- v -> spl_dontcare = NULL ;
678- gistunionsubkey (giststate , itup , v );
685+ /* Compute union keys, unless outer recursion level will handle it */
686+ if (attno == 0 && giststate -> tupdesc -> natts == 1 )
687+ {
688+ v -> spl_dontcare = NULL ;
689+ gistunionsubkey (giststate , itup , v );
690+ }
679691 }
680692 else
681693 {
682694 /*
683- * all keys are not-null, so apply user-defined PickSplit method
695+ * All keys are not-null, so apply user-defined PickSplit method
684696 */
685697 if (gistUserPicksplit (r , entryvec , attno , v , itup , len , giststate ))
686698 {
687699 /*
688700 * Splitting on attno column is not optimal, so consider
689701 * redistributing don't-care tuples according to the next column
690702 */
691- Assert (attno + 1 < r -> rd_att -> natts );
703+ Assert (attno + 1 < giststate -> tupdesc -> natts );
692704
693705 if (v -> spl_dontcare == NULL )
694706 {
695707 /*
696- * Simple case: left and right keys for attno column are
697- * equal, so just split according to the next column.
708+ * This split was actually degenerate, so ignore it altogether
709+ * and just split according to the next column.
698710 */
699711 gistSplitByKey (r , page , itup , len , giststate , v , attno + 1 );
700712 }
@@ -741,10 +753,27 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
741753 backupSplit .spl_right [backupSplit .spl_nright ++ ] = map [v -> splitVector .spl_right [i ] - 1 ];
742754
743755 v -> splitVector = backupSplit ;
744-
745- /* recompute left and right union datums */
746- gistunionsubkey (giststate , itup , v );
747756 }
748757 }
749758 }
759+
760+ /*
761+ * If we're handling a multicolumn index, at the end of the recursion
762+ * recompute the left and right union datums for all index columns. This
763+ * makes sure we hand back correct union datums in all corner cases,
764+ * including when we haven't processed all columns to start with, or when
765+ * a secondary split moved "don't care" tuples from one side to the other
766+ * (we really shouldn't assume that that didn't change the union datums).
767+ *
768+ * Note: when we're in an internal recursion (attno > 0), we do not worry
769+ * about whether the union datums we return with are sensible, since
770+ * calling levels won't care. Also, in a single-column index, we expect
771+ * that PickSplit (or the special cases above) produced correct union
772+ * datums.
773+ */
774+ if (attno == 0 && giststate -> tupdesc -> natts > 1 )
775+ {
776+ v -> spl_dontcare = NULL ;
777+ gistunionsubkey (giststate , itup , v );
778+ }
750779}
0 commit comments