6161#define ITEM_FREQUENCY (item ,ndims ) ((double *) (ITEM_NULLS(item, ndims) + (ndims)))
6262#define ITEM_BASE_FREQUENCY (item ,ndims ) ((double *) (ITEM_FREQUENCY(item, ndims) + 1))
6363
64+ /*
65+ * Used to compute size of serialized MCV list representation.
66+ */
67+ #define MinSizeOfMCVList \
68+ (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
69+
70+ #define SizeOfMCVList (ndims ,nitems ) \
71+ (MAXALIGN(MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
72+ MAXALIGN((ndims) * sizeof(DimensionInfo)) + \
73+ MAXALIGN((nitems) * ITEM_SIZE(ndims)))
6474
6575static MultiSortSupport build_mss (VacAttrStats * * stats , int numattrs );
6676
@@ -491,7 +501,6 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
491501 char * item = palloc0 (itemsize );
492502
493503 /* serialized items (indexes into arrays, etc.) */
494- bytea * output ;
495504 char * raw ;
496505 char * ptr ;
497506
@@ -625,27 +634,53 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
625634 * Now we can finally compute how much space we'll actually need for the
626635 * whole serialized MCV list (varlena header, MCV header, dimension info
627636 * for each attribute, deduplicated values and items).
637+ *
638+ * The header fields are copied one by one, so that we don't need any
639+ * explicit alignment (we copy them while deserializing). All fields
640+ * after this need to be properly aligned, for direct access.
628641 */
629- total_length = offsetof(MCVList , items )
630- + MAXALIGN (ndims * sizeof (DimensionInfo ));
642+ total_length = MAXALIGN (VARHDRSZ + (3 * sizeof (uint32 ))
643+ + sizeof (AttrNumber ) + (ndims * sizeof (Oid )));
644+
645+ /* dimension info */
646+ total_length += MAXALIGN (ndims * sizeof (DimensionInfo ));
631647
632648 /* add space for the arrays of deduplicated values */
633649 for (i = 0 ; i < ndims ; i ++ )
634650 total_length += MAXALIGN (info [i ].nbytes );
635651
636- /* and finally the items (no additional alignment needed) */
652+ /*
653+ * And finally the items (no additional alignment needed, we start
654+ * at proper alignment and the itemsize formula uses MAXALIGN)
655+ */
637656 total_length += mcvlist -> nitems * itemsize ;
638657
639658 /*
640659 * Allocate space for the whole serialized MCV list (we'll skip bytes,
641660 * so we set them to zero to make the result more compressible).
642661 */
643662 raw = palloc0 (total_length );
644- ptr = raw ;
663+ SET_VARSIZE (raw , total_length );
664+ ptr = VARDATA (raw );
665+
666+ /* copy the MCV list header fields, one by one */
667+ memcpy (ptr , & mcvlist -> magic , sizeof (uint32 ));
668+ ptr += sizeof (uint32 );
669+
670+ memcpy (ptr , & mcvlist -> type , sizeof (uint32 ));
671+ ptr += sizeof (uint32 );
672+
673+ memcpy (ptr , & mcvlist -> nitems , sizeof (uint32 ));
674+ ptr += sizeof (uint32 );
675+
676+ memcpy (ptr , & mcvlist -> ndimensions , sizeof (AttrNumber ));
677+ ptr += sizeof (AttrNumber );
645678
646- /* copy the MCV list header */
647- memcpy (ptr , mcvlist , offsetof(MCVList , items ));
648- ptr += offsetof(MCVList , items );
679+ memcpy (ptr , mcvlist -> types , sizeof (Oid ) * ndims );
680+ ptr += (sizeof (Oid ) * ndims );
681+
682+ /* the header may not be exactly aligned, so make sure it is */
683+ ptr = raw + MAXALIGN (ptr - raw );
649684
650685 /* store information about the attributes */
651686 memcpy (ptr , info , sizeof (DimensionInfo ) * ndims );
@@ -761,14 +796,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
761796 pfree (values );
762797 pfree (counts );
763798
764- output = (bytea * ) palloc (VARHDRSZ + total_length );
765- SET_VARSIZE (output , VARHDRSZ + total_length );
766-
767- memcpy (VARDATA_ANY (output ), raw , total_length );
768-
769- pfree (raw );
770-
771- return output ;
799+ return (bytea * ) raw ;
772800}
773801
774802/*
@@ -789,8 +817,7 @@ statext_mcv_deserialize(bytea *data)
789817 char * ptr ;
790818
791819 int ndims ,
792- nitems ,
793- itemsize ;
820+ nitems ;
794821 DimensionInfo * info = NULL ;
795822
796823 /* local allocation buffer (used only for deserialization) */
@@ -810,24 +837,32 @@ statext_mcv_deserialize(bytea *data)
810837
811838 /*
812839 * We can't possibly deserialize a MCV list if there's not even a complete
813- * header.
840+ * header. We need an explicit formula here, because we serialize the
841+ * header fields one by one, so we need to ignore struct alignment.
814842 */
815- if (VARSIZE_ANY_EXHDR (data ) < offsetof( MCVList , items ) )
843+ if (VARSIZE_ANY (data ) < MinSizeOfMCVList )
816844 elog (ERROR , "invalid MCV size %zd (expected at least %zu)" ,
817- VARSIZE_ANY_EXHDR (data ), offsetof( MCVList , items ) );
845+ VARSIZE_ANY (data ), MinSizeOfMCVList );
818846
819847 /* read the MCV list header */
820848 mcvlist = (MCVList * ) palloc0 (offsetof(MCVList , items ));
821849
822- /* initialize pointer to the data part (skip the varlena header) */
823- raw = palloc (VARSIZE_ANY_EXHDR (data ));
824- ptr = raw ;
825-
826- memcpy (raw , VARDATA_ANY (data ), VARSIZE_ANY_EXHDR (data ));
850+ /* pointer to the data part (skip the varlena header) */
851+ ptr = VARDATA_ANY (data );
852+ raw = (char * ) data ;
827853
828854 /* get the header and perform further sanity checks */
829- memcpy (mcvlist , ptr , offsetof(MCVList , items ));
830- ptr += offsetof(MCVList , items );
855+ memcpy (& mcvlist -> magic , ptr , sizeof (uint32 ));
856+ ptr += sizeof (uint32 );
857+
858+ memcpy (& mcvlist -> type , ptr , sizeof (uint32 ));
859+ ptr += sizeof (uint32 );
860+
861+ memcpy (& mcvlist -> nitems , ptr , sizeof (uint32 ));
862+ ptr += sizeof (uint32 );
863+
864+ memcpy (& mcvlist -> ndimensions , ptr , sizeof (AttrNumber ));
865+ ptr += sizeof (AttrNumber );
831866
832867 if (mcvlist -> magic != STATS_MCV_MAGIC )
833868 elog (ERROR , "invalid MCV magic %u (expected %u)" ,
@@ -852,25 +887,29 @@ statext_mcv_deserialize(bytea *data)
852887
853888 nitems = mcvlist -> nitems ;
854889 ndims = mcvlist -> ndimensions ;
855- itemsize = ITEM_SIZE (ndims );
856890
857891 /*
858892 * Check amount of data including DimensionInfo for all dimensions and
859893 * also the serialized items (including uint16 indexes). Also, walk
860894 * through the dimension information and add it to the sum.
861895 */
862- expected_size = offsetof(MCVList , items ) +
863- ndims * sizeof (DimensionInfo ) +
864- (nitems * itemsize );
896+ expected_size = SizeOfMCVList (ndims , nitems );
865897
866898 /*
867899 * Check that we have at least the dimension and info records, along with
868900 * the items. We don't know the size of the serialized values yet. We need
869901 * to do this check first, before accessing the dimension info.
870902 */
871- if (VARSIZE_ANY_EXHDR (data ) < expected_size )
903+ if (VARSIZE_ANY (data ) < expected_size )
872904 elog (ERROR , "invalid MCV size %zd (expected %zu)" ,
873- VARSIZE_ANY_EXHDR (data ), expected_size );
905+ VARSIZE_ANY (data ), expected_size );
906+
907+ /* Now copy the array of type Oids. */
908+ memcpy (ptr , mcvlist -> types , sizeof (Oid ) * ndims );
909+ ptr += (sizeof (Oid ) * ndims );
910+
911+ /* ensure alignment of the pointer (after the header fields) */
912+ ptr = raw + MAXALIGN (ptr - raw );
874913
875914 /* Now it's safe to access the dimension info. */
876915 info = (DimensionInfo * ) ptr ;
@@ -894,9 +933,9 @@ statext_mcv_deserialize(bytea *data)
894933 * (header, dimension info. items and deduplicated data). So do the final
895934 * check on size.
896935 */
897- if (VARSIZE_ANY_EXHDR (data ) != expected_size )
936+ if (VARSIZE_ANY (data ) != expected_size )
898937 elog (ERROR , "invalid MCV size %zd (expected %zu)" ,
899- VARSIZE_ANY_EXHDR (data ), expected_size );
938+ VARSIZE_ANY (data ), expected_size );
900939
901940 /*
902941 * We need an array of Datum values for each dimension, so that we can
@@ -1063,18 +1102,17 @@ statext_mcv_deserialize(bytea *data)
10631102 ptr += ITEM_SIZE (ndims );
10641103
10651104 /* check we're not overflowing the input */
1066- Assert (ptr <= (char * ) raw + VARSIZE_ANY_EXHDR (data ));
1105+ Assert (ptr <= (char * ) raw + VARSIZE_ANY (data ));
10671106 }
10681107
10691108 /* check that we processed all the data */
1070- Assert (ptr == raw + VARSIZE_ANY_EXHDR (data ));
1109+ Assert (ptr == raw + VARSIZE_ANY (data ));
10711110
10721111 /* release the buffers used for mapping */
10731112 for (dim = 0 ; dim < ndims ; dim ++ )
10741113 pfree (map [dim ]);
10751114
10761115 pfree (map );
1077- pfree (raw );
10781116
10791117 return mcvlist ;
10801118}
0 commit comments