@@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid)
451451 *
452452 * The overall structure of the serialized representation looks like this:
453453 *
454- * +--------+----------------+---------------------+-------+
455- * | header | dimension info | deduplicated values | items |
456- * +--------+----------------+---------------------+-------+
454+ * +--------------- +----------------+---------------------+-------+
455+ * | header fields | dimension info | deduplicated values | items |
456+ * +--------------- +----------------+---------------------+-------+
457457 *
458458 * Where dimension info stores information about type of K-th attribute (e.g.
459459 * typlen, typbyval and length of deduplicated values). Deduplicated values
@@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
492492
493493 /* serialized items (indexes into arrays, etc.) */
494494 bytea * output ;
495+ char * raw ;
495496 char * ptr ;
496497
497498 /* values per dimension (and number of non-NULL values) */
@@ -593,18 +594,26 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
593594 info [dim ].nbytes = 0 ;
594595 for (i = 0 ; i < info [dim ].nvalues ; i ++ )
595596 {
597+ Size len ;
598+
596599 values [dim ][i ] = PointerGetDatum (PG_DETOAST_DATUM (values [dim ][i ]));
597- info [dim ].nbytes += VARSIZE_ANY (values [dim ][i ]);
600+
601+ len = VARSIZE_ANY (values [dim ][i ]);
602+ info [dim ].nbytes += MAXALIGN (len );
598603 }
599604 }
600605 else if (info [dim ].typlen == -2 ) /* cstring */
601606 {
602607 info [dim ].nbytes = 0 ;
603608 for (i = 0 ; i < info [dim ].nvalues ; i ++ )
604609 {
610+ Size len ;
611+
605612 /* c-strings include terminator, so +1 byte */
606613 values [dim ][i ] = PointerGetDatum (PG_DETOAST_DATUM (values [dim ][i ]));
607- info [dim ].nbytes += strlen (DatumGetCString (values [dim ][i ])) + 1 ;
614+
615+ len = strlen (DatumGetCString (values [dim ][i ])) + 1 ;
616+ info [dim ].nbytes += MAXALIGN (len );
608617 }
609618 }
610619
@@ -617,28 +626,30 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
617626 * whole serialized MCV list (varlena header, MCV header, dimension info
618627 * for each attribute, deduplicated values and items).
619628 */
620- total_length = VARHDRSZ + offsetof(MCVList , items )
621- + (ndims * sizeof (DimensionInfo ))
622- + (mcvlist -> nitems * itemsize );
629+ total_length = offsetof(MCVList , items )
630+ + MAXALIGN (ndims * sizeof (DimensionInfo ));
623631
624632 /* add space for the arrays of deduplicated values */
625633 for (i = 0 ; i < ndims ; i ++ )
626- total_length += info [i ].nbytes ;
634+ total_length += MAXALIGN ( info [i ].nbytes ) ;
627635
628- /* allocate space for the whole serialized MCV list */
629- output = (bytea * ) palloc (total_length );
630- SET_VARSIZE (output , total_length );
636+ /* and finally the items (no additional alignment needed) */
637+ total_length += mcvlist -> nitems * itemsize ;
631638
632- /* 'ptr' points to the current position in the output buffer */
633- ptr = VARDATA (output );
639+ /*
640+ * Allocate space for the whole serialized MCV list (we'll skip bytes,
641+ * so we set them to zero to make the result more compressible).
642+ */
643+ raw = palloc0 (total_length );
644+ ptr = raw ;
634645
635646 /* copy the MCV list header */
636647 memcpy (ptr , mcvlist , offsetof(MCVList , items ));
637648 ptr += offsetof(MCVList , items );
638649
639650 /* store information about the attributes */
640651 memcpy (ptr , info , sizeof (DimensionInfo ) * ndims );
641- ptr += sizeof (DimensionInfo ) * ndims ;
652+ ptr += MAXALIGN ( sizeof (DimensionInfo ) * ndims ) ;
642653
643654 /* Copy the deduplicated values for all attributes to the output. */
644655 for (dim = 0 ; dim < ndims ; dim ++ )
@@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
670681 }
671682 else if (info [dim ].typlen > 0 ) /* pased by reference */
672683 {
684+ /* no special alignment needed, treated as char array */
673685 memcpy (ptr , DatumGetPointer (value ), info [dim ].typlen );
674686 ptr += info [dim ].typlen ;
675687 }
@@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
678690 int len = VARSIZE_ANY (value );
679691
680692 memcpy (ptr , DatumGetPointer (value ), len );
681- ptr += len ;
693+ ptr += MAXALIGN ( len ) ;
682694 }
683695 else if (info [dim ].typlen == -2 ) /* cstring */
684696 {
685697 Size len = strlen (DatumGetCString (value )) + 1 ; /* terminator */
686698
687699 memcpy (ptr , DatumGetCString (value ), len );
688- ptr += len ;
700+ ptr += MAXALIGN ( len ) ;
689701 }
690702
691703 /* no underflows or overflows */
@@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
694706
695707 /* we should get exactly nbytes of data for this dimension */
696708 Assert ((ptr - start ) == info [dim ].nbytes );
709+
710+ /* make sure the pointer is aligned correctly after each dimension */
711+ ptr = raw + MAXALIGN (ptr - raw );
697712 }
698713
699714 /* Serialize the items, with uint16 indexes instead of the values. */
@@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
702717 MCVItem * mcvitem = & mcvlist -> items [i ];
703718
704719 /* don't write beyond the allocated space */
705- Assert (ptr <= ( char * ) output + total_length - itemsize );
720+ Assert (ptr <= raw + total_length - itemsize );
706721
707722 /* reset the item (we only allocate it once and reuse it) */
708723 memset (item , 0 , itemsize );
@@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
741756 }
742757
743758 /* at this point we expect to match the total_length exactly */
744- Assert ((ptr - ( char * ) output ) == total_length );
759+ Assert ((ptr - raw ) == total_length );
745760
746761 pfree (item );
747762 pfree (values );
748763 pfree (counts );
749764
765+ output = (bytea * ) palloc (VARHDRSZ + total_length );
766+ SET_VARSIZE (output , VARHDRSZ + total_length );
767+
768+ memcpy (VARDATA_ANY (output ), raw , total_length );
769+
770+ pfree (raw );
771+
750772 return output ;
751773}
752774
@@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data)
764786 i ;
765787 Size expected_size ;
766788 MCVList * mcvlist ;
789+ char * raw ;
767790 char * ptr ;
768791
769792 int ndims ,
@@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data)
781804 Size datalen ;
782805 char * dataptr ;
783806 char * valuesptr ;
807+ char * isnullptr ;
784808
785809 if (data == NULL )
786810 return NULL ;
@@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data)
797821 mcvlist = (MCVList * ) palloc0 (offsetof(MCVList , items ));
798822
799823 /* initialize pointer to the data part (skip the varlena header) */
800- ptr = VARDATA_ANY (data );
824+ raw = palloc (VARSIZE_ANY_EXHDR (data ));
825+ ptr = raw ;
826+
827+ memcpy (raw , VARDATA_ANY (data ), VARSIZE_ANY_EXHDR (data ));
801828
802829 /* get the header and perform further sanity checks */
803830 memcpy (mcvlist , ptr , offsetof(MCVList , items ));
@@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data)
848875
849876 /* Now it's safe to access the dimension info. */
850877 info = (DimensionInfo * ) ptr ;
851- ptr += ndims * sizeof (DimensionInfo );
878+ ptr += MAXALIGN ( ndims * sizeof (DimensionInfo ) );
852879
853880 /* account for the value arrays */
854881 for (dim = 0 ; dim < ndims ; dim ++ )
@@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data)
860887 Assert (info [dim ].nvalues >= 0 );
861888 Assert (info [dim ].nbytes >= 0 );
862889
863- expected_size += info [dim ].nbytes ;
890+ expected_size += MAXALIGN ( info [dim ].nbytes ) ;
864891 }
865892
866893 /*
@@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data)
890917
891918 /* space needed for a copy of data for by-ref types */
892919 if (!info [dim ].typbyval )
893- datalen += info [dim ].nbytes ;
920+ datalen += MAXALIGN ( info [dim ].nbytes ) ;
894921 }
895922
896923 /*
@@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data)
899926 * original data - it may disappear while we're still using the MCV list,
900927 * e.g. due to catcache release. Only needed for by-ref types.
901928 */
902- mcvlen = offsetof(MCVList , items ) +
903- + (sizeof (MCVItem ) * nitems ) /* array of MCVItem */
904- + ((sizeof (Datum ) + sizeof (bool )) * ndims * nitems ) +
905- + datalen ; /* by-ref data */
929+ mcvlen = MAXALIGN (offsetof(MCVList , items ) + (sizeof (MCVItem ) * nitems ));
930+
931+ /* arrays of values and isnull flags for all MCV items */
932+ mcvlen += MAXALIGN (sizeof (Datum ) * ndims * nitems );
933+ mcvlen += MAXALIGN (sizeof (bool ) * ndims * nitems );
906934
935+ /* we don't quite need to align this, but it makes some assers easier */
936+ mcvlen += MAXALIGN (datalen );
937+
938+ /* now resize the deserialized MCV list, and compute pointers to parts */
907939 mcvlist = repalloc (mcvlist , mcvlen );
908940
909- /* pointer to the beginning of values/isnull space */
910- valuesptr = (char * ) mcvlist + offsetof(MCVList , items )
911- + (sizeof (MCVItem ) * nitems );
941+ /* pointer to the beginning of values/isnull arrays */
942+ valuesptr = (char * ) mcvlist
943+ + MAXALIGN (offsetof(MCVList , items ) + (sizeof (MCVItem ) * nitems ));
944+
945+ isnullptr = valuesptr + (MAXALIGN (sizeof (Datum ) * ndims * nitems ));
912946
913- /* get pointer where to store the data */
914- dataptr = (char * ) mcvlist + (mcvlen - datalen );
947+ dataptr = isnullptr + (MAXALIGN (sizeof (bool ) * ndims * nitems ));
915948
916949 /*
917950 * Build mapping (index => value) for translating the serialized data into
@@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data)
963996 Size len = VARSIZE_ANY (ptr );
964997
965998 memcpy (dataptr , ptr , len );
966- ptr += len ;
999+ ptr += MAXALIGN ( len ) ;
9671000
9681001 /* just point into the array */
9691002 map [dim ][i ] = PointerGetDatum (dataptr );
970- dataptr += len ;
1003+ dataptr += MAXALIGN ( len ) ;
9711004 }
9721005 }
9731006 else if (info [dim ].typlen == -2 )
@@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data)
9781011 Size len = (strlen (ptr ) + 1 ); /* don't forget the \0 */
9791012
9801013 memcpy (dataptr , ptr , len );
981- ptr += len ;
1014+ ptr += MAXALIGN ( len ) ;
9821015
9831016 /* just point into the array */
9841017 map [dim ][i ] = PointerGetDatum (dataptr );
985- dataptr += len ;
1018+ dataptr += MAXALIGN ( len ) ;
9861019 }
9871020 }
9881021
@@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data)
9951028
9961029 /* check we consumed input data for this dimension exactly */
9971030 Assert (ptr == (start + info [dim ].nbytes ));
1031+
1032+ /* ensure proper alignment of the data */
1033+ ptr = raw + MAXALIGN (ptr - raw );
9981034 }
9991035
10001036 /* we should have also filled the MCV list exactly */
@@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data)
10271063 ptr += ITEM_SIZE (ndims );
10281064
10291065 /* check we're not overflowing the input */
1030- Assert (ptr <= (char * ) data + VARSIZE_ANY (data ));
1066+ Assert (ptr <= (char * ) raw + VARSIZE_ANY_EXHDR (data ));
10311067 }
10321068
10331069 /* check that we processed all the data */
1034- Assert (ptr == ( char * ) data + VARSIZE_ANY (data ));
1070+ Assert (ptr == raw + VARSIZE_ANY_EXHDR (data ));
10351071
10361072 /* release the buffers used for mapping */
10371073 for (dim = 0 ; dim < ndims ; dim ++ )
10381074 pfree (map [dim ]);
1075+
10391076 pfree (map );
1077+ pfree (raw );
10401078
10411079 return mcvlist ;
10421080}
0 commit comments