@@ -1689,10 +1689,14 @@ typedef struct
16891689} CompareScalarsContext ;
16901690
16911691
1692- static void compute_minimal_stats (VacAttrStatsP stats ,
1692+ static void compute_trivial_stats (VacAttrStatsP stats ,
16931693 AnalyzeAttrFetchFunc fetchfunc ,
16941694 int samplerows ,
16951695 double totalrows );
1696+ static void compute_distinct_stats (VacAttrStatsP stats ,
1697+ AnalyzeAttrFetchFunc fetchfunc ,
1698+ int samplerows ,
1699+ double totalrows );
16961700static void compute_scalar_stats (VacAttrStatsP stats ,
16971701 AnalyzeAttrFetchFunc fetchfunc ,
16981702 int samplerows ,
@@ -1723,21 +1727,17 @@ std_typanalyze(VacAttrStats *stats)
17231727 & ltopr , & eqopr , NULL ,
17241728 NULL );
17251729
1726- /* If column has no "=" operator, we can't do much of anything */
1727- if (!OidIsValid (eqopr ))
1728- return false;
1729-
17301730 /* Save the operator info for compute_stats routines */
17311731 mystats = (StdAnalyzeData * ) palloc (sizeof (StdAnalyzeData ));
17321732 mystats -> eqopr = eqopr ;
1733- mystats -> eqfunc = get_opcode (eqopr );
1733+ mystats -> eqfunc = OidIsValid ( eqopr ) ? get_opcode (eqopr ) : InvalidOid ;
17341734 mystats -> ltopr = ltopr ;
17351735 stats -> extra_data = mystats ;
17361736
17371737 /*
17381738 * Determine which standard statistics algorithm to use
17391739 */
1740- if (OidIsValid (ltopr ))
1740+ if (OidIsValid (eqopr ) && OidIsValid ( ltopr ))
17411741 {
17421742 /* Seems to be a scalar datatype */
17431743 stats -> compute_stats = compute_scalar_stats ;
@@ -1762,19 +1762,109 @@ std_typanalyze(VacAttrStats *stats)
17621762 */
17631763 stats -> minrows = 300 * attr -> attstattarget ;
17641764 }
1765+ else if (OidIsValid (eqopr ))
1766+ {
1767+ /* We can still recognize distinct values */
1768+ stats -> compute_stats = compute_distinct_stats ;
1769+ /* Might as well use the same minrows as above */
1770+ stats -> minrows = 300 * attr -> attstattarget ;
1771+ }
17651772 else
17661773 {
1767- /* Can't do much but the minimal stuff */
1768- stats -> compute_stats = compute_minimal_stats ;
1774+ /* Can't do much but the trivial stuff */
1775+ stats -> compute_stats = compute_trivial_stats ;
17691776 /* Might as well use the same minrows as above */
17701777 stats -> minrows = 300 * attr -> attstattarget ;
17711778 }
17721779
17731780 return true;
17741781}
17751782
1783+
1784+ /*
1785+ * compute_trivial_stats() -- compute very basic column statistics
1786+ *
1787+ * We use this when we cannot find a hash "=" operator for the datatype.
1788+ *
1789+ * We determine the fraction of non-null rows and the average datum width.
1790+ */
1791+ static void
1792+ compute_trivial_stats (VacAttrStatsP stats ,
1793+ AnalyzeAttrFetchFunc fetchfunc ,
1794+ int samplerows ,
1795+ double totalrows )
1796+ {
1797+ int i ;
1798+ int null_cnt = 0 ;
1799+ int nonnull_cnt = 0 ;
1800+ double total_width = 0 ;
1801+ bool is_varlena = (!stats -> attrtype -> typbyval &&
1802+ stats -> attrtype -> typlen == -1 );
1803+ bool is_varwidth = (!stats -> attrtype -> typbyval &&
1804+ stats -> attrtype -> typlen < 0 );
1805+
1806+ for (i = 0 ; i < samplerows ; i ++ )
1807+ {
1808+ Datum value ;
1809+ bool isnull ;
1810+
1811+ vacuum_delay_point ();
1812+
1813+ value = fetchfunc (stats , i , & isnull );
1814+
1815+ /* Check for null/nonnull */
1816+ if (isnull )
1817+ {
1818+ null_cnt ++ ;
1819+ continue ;
1820+ }
1821+ nonnull_cnt ++ ;
1822+
1823+ /*
1824+ * If it's a variable-width field, add up widths for average width
1825+ * calculation. Note that if the value is toasted, we use the toasted
1826+ * width. We don't bother with this calculation if it's a fixed-width
1827+ * type.
1828+ */
1829+ if (is_varlena )
1830+ {
1831+ total_width += VARSIZE_ANY (DatumGetPointer (value ));
1832+ }
1833+ else if (is_varwidth )
1834+ {
1835+ /* must be cstring */
1836+ total_width += strlen (DatumGetCString (value )) + 1 ;
1837+ }
1838+ }
1839+
1840+ /* We can only compute average width if we found some non-null values. */
1841+ if (nonnull_cnt > 0 )
1842+ {
1843+ stats -> stats_valid = true;
1844+ /* Do the simple null-frac and width stats */
1845+ stats -> stanullfrac = (double ) null_cnt / (double ) samplerows ;
1846+ if (is_varwidth )
1847+ stats -> stawidth = total_width / (double ) nonnull_cnt ;
1848+ else
1849+ stats -> stawidth = stats -> attrtype -> typlen ;
1850+ stats -> stadistinct = 0.0 ; /* "unknown" */
1851+ }
1852+ else if (null_cnt > 0 )
1853+ {
1854+ /* We found only nulls; assume the column is entirely null */
1855+ stats -> stats_valid = true;
1856+ stats -> stanullfrac = 1.0 ;
1857+ if (is_varwidth )
1858+ stats -> stawidth = 0 ; /* "unknown" */
1859+ else
1860+ stats -> stawidth = stats -> attrtype -> typlen ;
1861+ stats -> stadistinct = 0.0 ; /* "unknown" */
1862+ }
1863+ }
1864+
1865+
17761866/*
1777- * compute_minimal_stats () -- compute minimal column statistics
1867+ * compute_distinct_stats () -- compute column statistics including ndistinct
17781868 *
17791869 * We use this when we can find only an "=" operator for the datatype.
17801870 *
@@ -1789,10 +1879,10 @@ std_typanalyze(VacAttrStats *stats)
17891879 * depend mainly on the length of the list we are willing to keep.
17901880 */
17911881static void
1792- compute_minimal_stats (VacAttrStatsP stats ,
1793- AnalyzeAttrFetchFunc fetchfunc ,
1794- int samplerows ,
1795- double totalrows )
1882+ compute_distinct_stats (VacAttrStatsP stats ,
1883+ AnalyzeAttrFetchFunc fetchfunc ,
1884+ int samplerows ,
1885+ double totalrows )
17961886{
17971887 int i ;
17981888 int null_cnt = 0 ;
0 commit comments