@@ -53,9 +53,13 @@ typedef struct BrinBuildState
5353 Buffer bs_currentInsertBuf ;
5454 BlockNumber bs_pagesPerRange ;
5555 BlockNumber bs_currRangeStart ;
56+ BlockNumber bs_maxRangeStart ;
5657 BrinRevmap * bs_rmAccess ;
5758 BrinDesc * bs_bdesc ;
5859 BrinMemTuple * bs_dtuple ;
60+ BrinTuple * bs_emptyTuple ;
61+ Size bs_emptyTupleLen ;
62+ MemoryContext bs_context ;
5963} BrinBuildState ;
6064
6165/*
@@ -82,7 +86,9 @@ typedef struct BrinOpaque
8286#define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
8387
8488static BrinBuildState * initialize_brin_buildstate (Relation idxRel ,
85- BrinRevmap * revmap , BlockNumber pagesPerRange );
89+ BrinRevmap * revmap ,
90+ BlockNumber pagesPerRange ,
91+ BlockNumber tablePages );
8692static BrinInsertState * initialize_brin_insertstate (Relation idxRel , IndexInfo * indexInfo );
8793static void terminate_brin_buildstate (BrinBuildState * state );
8894static void brinsummarize (Relation index , Relation heapRel , BlockNumber pageRange ,
@@ -94,6 +100,8 @@ static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
94100static bool add_values_to_range (Relation idxRel , BrinDesc * bdesc ,
95101 BrinMemTuple * dtup , const Datum * values , const bool * nulls );
96102static bool check_null_keys (BrinValues * bval , ScanKey * nullkeys , int nnullkeys );
103+ static void brin_fill_empty_ranges (BrinBuildState * state ,
104+ BlockNumber prevRange , BlockNumber maxRange );
97105
98106/*
99107 * BRIN handler function: return IndexAmRoutine with access method parameters
@@ -933,7 +941,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
933941 * Initialize our state, including the deformed tuple state.
934942 */
935943 revmap = brinRevmapInitialize (index , & pagesPerRange );
936- state = initialize_brin_buildstate (index , revmap , pagesPerRange );
944+ state = initialize_brin_buildstate (index , revmap , pagesPerRange ,
945+ RelationGetNumberOfBlocks (heap ));
937946
938947 /*
939948 * Now scan the relation. No syncscan allowed here because we want the
@@ -945,6 +954,17 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
945954 /* process the final batch */
946955 form_and_insert_tuple (state );
947956
957+ /*
958+ * Backfill the final ranges with empty data.
959+ *
960+ * This saves us from doing what amounts to full table scans when the
961+ * index with a predicate like WHERE (nonnull_column IS NULL), or other
962+ * very selective predicates.
963+ */
964+ brin_fill_empty_ranges (state ,
965+ state -> bs_currRangeStart ,
966+ state -> bs_maxRangeStart );
967+
948968 /* release resources */
949969 idxtuples = state -> bs_numtuples ;
950970 brinRevmapTerminate (state -> bs_rmAccess );
@@ -1358,9 +1378,10 @@ brinGetStats(Relation index, BrinStatsData *stats)
13581378 */
13591379static BrinBuildState *
13601380initialize_brin_buildstate (Relation idxRel , BrinRevmap * revmap ,
1361- BlockNumber pagesPerRange )
1381+ BlockNumber pagesPerRange , BlockNumber tablePages )
13621382{
13631383 BrinBuildState * state ;
1384+ BlockNumber lastRange = 0 ;
13641385
13651386 state = palloc_object (BrinBuildState );
13661387
@@ -1373,6 +1394,22 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
13731394 state -> bs_bdesc = brin_build_desc (idxRel );
13741395 state -> bs_dtuple = brin_new_memtuple (state -> bs_bdesc );
13751396
1397+ /* Remember the memory context to use for an empty tuple, if needed. */
1398+ state -> bs_context = CurrentMemoryContext ;
1399+ state -> bs_emptyTuple = NULL ;
1400+ state -> bs_emptyTupleLen = 0 ;
1401+
1402+ /*
1403+ * Calculate the start of the last page range. Page numbers are 0-based,
1404+ * so to calculate the index we need to subtract one. The integer division
1405+ * gives us the index of the page range.
1406+ */
1407+ if (tablePages > 0 )
1408+ lastRange = ((tablePages - 1 ) / pagesPerRange ) * pagesPerRange ;
1409+
1410+ /* Now calculate the start of the next range. */
1411+ state -> bs_maxRangeStart = lastRange + state -> bs_pagesPerRange ;
1412+
13761413 return state ;
13771414}
13781415
@@ -1612,7 +1649,8 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
16121649 /* first time through */
16131650 Assert (!indexInfo );
16141651 state = initialize_brin_buildstate (index , revmap ,
1615- pagesPerRange );
1652+ pagesPerRange ,
1653+ InvalidBlockNumber );
16161654 indexInfo = BuildIndexInfo (index );
16171655 }
16181656 summarize_range (indexInfo , state , heapRel , startBlk , heapNumBlocks );
@@ -1982,3 +2020,78 @@ check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
19822020
19832021 return true;
19842022}
2023+
2024+ /*
2025+ * brin_build_empty_tuple
2026+ * Maybe initialize a BRIN tuple representing empty range.
2027+ *
2028+ * Returns a BRIN tuple representing an empty page range starting at the
2029+ * specified block number. The empty tuple is initialized only once, when it's
2030+ * needed for the first time, stored in the memory context bs_context to ensure
2031+ * proper life span, and reused on following calls. All empty tuples are
2032+ * exactly the same except for the bs_blkno field, which is set to the value
2033+ * in blkno parameter.
2034+ */
2035+ static void
2036+ brin_build_empty_tuple (BrinBuildState * state , BlockNumber blkno )
2037+ {
2038+ /* First time an empty tuple is requested? If yes, initialize it. */
2039+ if (state -> bs_emptyTuple == NULL )
2040+ {
2041+ MemoryContext oldcxt ;
2042+ BrinMemTuple * dtuple = brin_new_memtuple (state -> bs_bdesc );
2043+
2044+ /* Allocate the tuple in context for the whole index build. */
2045+ oldcxt = MemoryContextSwitchTo (state -> bs_context );
2046+
2047+ state -> bs_emptyTuple = brin_form_tuple (state -> bs_bdesc , blkno , dtuple ,
2048+ & state -> bs_emptyTupleLen );
2049+
2050+ MemoryContextSwitchTo (oldcxt );
2051+ }
2052+ else
2053+ {
2054+ /* If we already have an empty tuple, just update the block. */
2055+ state -> bs_emptyTuple -> bt_blkno = blkno ;
2056+ }
2057+ }
2058+
2059+ /*
2060+ * brin_fill_empty_ranges
2061+ * Add BRIN index tuples representing empty page ranges.
2062+ *
2063+ * prevRange/nextRange determine for which page ranges to add empty summaries.
2064+ * Both boundaries are exclusive, i.e. only ranges starting at blkno for which
2065+ * (prevRange < blkno < nextRange) will be added to the index.
2066+ *
2067+ * If prevRange is InvalidBlockNumber, this means there was no previous page
2068+ * range (i.e. the first empty range to add is for blkno=0).
2069+ *
2070+ * The empty tuple is built only once, and then reused for all future calls.
2071+ */
2072+ static void
2073+ brin_fill_empty_ranges (BrinBuildState * state ,
2074+ BlockNumber prevRange , BlockNumber nextRange )
2075+ {
2076+ BlockNumber blkno ;
2077+
2078+ /*
2079+ * If we already summarized some ranges, we need to start with the next
2080+ * one. Otherwise start from the first range of the table.
2081+ */
2082+ blkno = (prevRange == InvalidBlockNumber ) ? 0 : (prevRange + state -> bs_pagesPerRange );
2083+
2084+ /* Generate empty ranges until we hit the next non-empty range. */
2085+ while (blkno < nextRange )
2086+ {
2087+ /* Did we already build the empty tuple? If not, do it now. */
2088+ brin_build_empty_tuple (state , blkno );
2089+
2090+ brin_doinsert (state -> bs_irel , state -> bs_pagesPerRange , state -> bs_rmAccess ,
2091+ & state -> bs_currentInsertBuf ,
2092+ blkno , state -> bs_emptyTuple , state -> bs_emptyTupleLen );
2093+
2094+ /* try next page range */
2095+ blkno += state -> bs_pagesPerRange ;
2096+ }
2097+ }
0 commit comments