@@ -190,7 +190,9 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
190190 Relation heap , Relation index ,
191191 int sortmem , bool progress );
192192
193- static Datum _gin_parse_tuple (GinTuple * a , ItemPointerData * * items );
193+ static ItemPointer _gin_parse_tuple_items (GinTuple * a );
194+ static Datum _gin_parse_tuple_key (GinTuple * a );
195+
194196static GinTuple * _gin_build_tuple (OffsetNumber attrnum , unsigned char category ,
195197 Datum key , int16 typlen , bool typbyval ,
196198 ItemPointerData * items , uint32 nitems ,
@@ -1365,7 +1367,8 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
13651367
13661368 AssertCheckGinBuffer (buffer );
13671369
1368- key = _gin_parse_tuple (tup , & items );
1370+ key = _gin_parse_tuple_key (tup );
1371+ items = _gin_parse_tuple_items (tup );
13691372
13701373 /* if the buffer is empty, set the fields (and copy the key) */
13711374 if (GinBufferIsEmpty (buffer ))
@@ -1401,6 +1404,9 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
14011404
14021405 AssertCheckItemPointers (buffer );
14031406 }
1407+
1408+ /* free the decompressed TID list */
1409+ pfree (items );
14041410}
14051411
14061412/*
@@ -1955,6 +1961,15 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
19551961 table_close (heapRel , heapLockmode );
19561962}
19571963
1964+ /*
1965+ * Used to keep track of compressed TID lists when building a GIN tuple.
1966+ */
1967+ typedef struct
1968+ {
1969+ dlist_node node ; /* linked list pointers */
1970+ GinPostingList * seg ;
1971+ } GinSegmentInfo ;
1972+
19581973/*
19591974 * _gin_build_tuple
19601975 * Serialize the state for an index key into a tuple for tuplesort.
@@ -1967,6 +1982,11 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
19671982 * like endianess etc. We could make it a little bit smaller, but it's not
19681983 * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
19691984 * start of the TID list anyway. So we wouldn't save anything.
1985+ *
1986+ * The TID list is serialized as compressed - it's highly compressible, and
1987+ * we already have ginCompressPostingList for this purpose. The list may be
1988+ * pretty long, so we compress it into multiple segments and then copy all
1989+ * of that into the GIN tuple.
19701990 */
19711991static GinTuple *
19721992_gin_build_tuple (OffsetNumber attrnum , unsigned char category ,
@@ -1980,6 +2000,11 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
19802000 Size tuplen ;
19812001 int keylen ;
19822002
2003+ dlist_mutable_iter iter ;
2004+ dlist_head segments ;
2005+ int ncompressed ;
2006+ Size compresslen ;
2007+
19832008 /*
19842009 * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
19852010 * have actual non-empty key. We include varlena headers and \0 bytes for
@@ -2006,12 +2031,34 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
20062031 else
20072032 elog (ERROR , "unexpected typlen value (%d)" , typlen );
20082033
2034+ /* compress the item pointers */
2035+ ncompressed = 0 ;
2036+ compresslen = 0 ;
2037+ dlist_init (& segments );
2038+
2039+ /* generate compressed segments of TID list chunks */
2040+ while (ncompressed < nitems )
2041+ {
2042+ int cnt ;
2043+ GinSegmentInfo * seginfo = palloc (sizeof (GinSegmentInfo ));
2044+
2045+ seginfo -> seg = ginCompressPostingList (& items [ncompressed ],
2046+ (nitems - ncompressed ),
2047+ UINT16_MAX ,
2048+ & cnt );
2049+
2050+ ncompressed += cnt ;
2051+ compresslen += SizeOfGinPostingList (seginfo -> seg );
2052+
2053+ dlist_push_tail (& segments , & seginfo -> node );
2054+ }
2055+
20092056 /*
20102057 * Determine GIN tuple length with all the data included. Be careful about
2011- * alignment, to allow direct access to item pointers.
2058+ * alignment, to allow direct access to compressed segments (those require
2059+ * only SHORTALIGN).
20122060 */
2013- tuplen = SHORTALIGN (offsetof(GinTuple , data ) + keylen ) +
2014- (sizeof (ItemPointerData ) * nitems );
2061+ tuplen = SHORTALIGN (offsetof(GinTuple , data ) + keylen ) + compresslen ;
20152062
20162063 * len = tuplen ;
20172064
@@ -2061,37 +2108,40 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
20612108 /* finally, copy the TIDs into the array */
20622109 ptr = (char * ) tuple + SHORTALIGN (offsetof(GinTuple , data ) + keylen );
20632110
2064- memcpy (ptr , items , sizeof (ItemPointerData ) * nitems );
2111+ /* copy in the compressed data, and free the segments */
2112+ dlist_foreach_modify (iter , & segments )
2113+ {
2114+ GinSegmentInfo * seginfo = dlist_container (GinSegmentInfo , node , iter .cur );
2115+
2116+ memcpy (ptr , seginfo -> seg , SizeOfGinPostingList (seginfo -> seg ));
2117+
2118+ ptr += SizeOfGinPostingList (seginfo -> seg );
2119+
2120+ dlist_delete (& seginfo -> node );
2121+
2122+ pfree (seginfo -> seg );
2123+ pfree (seginfo );
2124+ }
20652125
20662126 return tuple ;
20672127}
20682128
20692129/*
2070- * _gin_parse_tuple
2071- * Deserialize the tuple from the tuplestore representation .
2130+ * _gin_parse_tuple_key
2131+ * Return a Datum representing the key stored in the tuple .
20722132 *
2073- * Most of the fields are actually directly accessible, the only thing that
2133+ * Most of the tuple fields are directly accessible, the only thing that
20742134 * needs more care is the key and the TID list.
20752135 *
20762136 * For the key, this returns a regular Datum representing it. It's either the
20772137 * actual key value, or a pointer to the beginning of the data array (which is
20782138 * where the data was copied by _gin_build_tuple).
2079- *
2080- * The pointer to the TID list is returned through 'items' (which is simply
2081- * a pointer to the data array).
20822139 */
20832140static Datum
2084- _gin_parse_tuple (GinTuple * a , ItemPointerData * * items )
2141+ _gin_parse_tuple_key (GinTuple * a )
20852142{
20862143 Datum key ;
20872144
2088- if (items )
2089- {
2090- char * ptr = (char * ) a + SHORTALIGN (offsetof(GinTuple , data ) + a -> keylen );
2091-
2092- * items = (ItemPointerData * ) ptr ;
2093- }
2094-
20952145 if (a -> category != GIN_CAT_NORM_KEY )
20962146 return (Datum ) 0 ;
20972147
@@ -2104,6 +2154,28 @@ _gin_parse_tuple(GinTuple *a, ItemPointerData **items)
21042154 return PointerGetDatum (a -> data );
21052155}
21062156
2157+ /*
2158+ * _gin_parse_tuple_items
2159+ * Return a pointer to a palloc'd array of decompressed TID array.
2160+ */
2161+ static ItemPointer
2162+ _gin_parse_tuple_items (GinTuple * a )
2163+ {
2164+ int len ;
2165+ char * ptr ;
2166+ int ndecoded ;
2167+ ItemPointer items ;
2168+
2169+ len = a -> tuplen - SHORTALIGN (offsetof(GinTuple , data ) + a -> keylen );
2170+ ptr = (char * ) a + SHORTALIGN (offsetof(GinTuple , data ) + a -> keylen );
2171+
2172+ items = ginPostingListDecodeAllSegments ((GinPostingList * ) ptr , len , & ndecoded );
2173+
2174+ Assert (ndecoded == a -> nitems );
2175+
2176+ return (ItemPointer ) items ;
2177+ }
2178+
21072179/*
21082180 * _gin_compare_tuples
21092181 * Compare GIN tuples, used by tuplesort during parallel index build.
@@ -2139,8 +2211,8 @@ _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
21392211
21402212 if (a -> category == GIN_CAT_NORM_KEY )
21412213 {
2142- keya = _gin_parse_tuple ( a , NULL );
2143- keyb = _gin_parse_tuple ( b , NULL );
2214+ keya = _gin_parse_tuple_key ( a );
2215+ keyb = _gin_parse_tuple_key ( b );
21442216
21452217 r = ApplySortComparator (keya , false,
21462218 keyb , false,
0 commit comments