@@ -66,6 +66,7 @@ typedef struct
6666 bool collate_c ;
6767 hyperLogLogState abbr_card ; /* Abbreviated key cardinality state */
6868 hyperLogLogState full_card ; /* Full key cardinality state */
69+ double prop_card ; /* Required cardinality proportion */
6970#ifdef HAVE_LOCALE_T
7071 pg_locale_t locale ;
7172#endif
@@ -1845,6 +1846,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
18451846 */
18461847 if (abbreviate )
18471848 {
1849+ tss -> prop_card = 0.20 ;
18481850 initHyperLogLog (& tss -> abbr_card , 10 );
18491851 initHyperLogLog (& tss -> full_card , 10 );
18501852 ssup -> abbrev_full_comparator = ssup -> comparator ;
@@ -2125,7 +2127,7 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
21252127 Assert (ssup -> abbreviate );
21262128
21272129 /* Have a little patience */
2128- if (memtupcount < 20 )
2130+ if (memtupcount < 100 )
21292131 return false;
21302132
21312133 abbrev_distinct = estimateHyperLogLog (& tss -> abbr_card );
@@ -2151,8 +2153,9 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
21512153 {
21522154 double norm_abbrev_card = abbrev_distinct / (double ) memtupcount ;
21532155
2154- elog (DEBUG_elog_output , "abbrev_distinct after %d: %f (key_distinct: %f, norm_abbrev_card: %f)" ,
2155- memtupcount , abbrev_distinct , key_distinct , norm_abbrev_card );
2156+ elog (DEBUG_elog_output , "abbrev_distinct after %d: %f (key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)" ,
2157+ memtupcount , abbrev_distinct , key_distinct , norm_abbrev_card ,
2158+ tss -> prop_card );
21562159 }
21572160#endif
21582161
@@ -2172,8 +2175,38 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
21722175 * abbreviated comparison with a cheap memcmp()-based authoritative
21732176 * resolution are equivalent.
21742177 */
2175- if (abbrev_distinct > key_distinct * 0.05 )
2178+ if (abbrev_distinct > key_distinct * tss -> prop_card )
2179+ {
2180+ /*
2181+ * When we have exceeded 10,000 tuples, decay required cardinality
2182+ * aggressively for next call.
2183+ *
2184+ * This is useful because the number of comparisons required on average
2185+ * increases at a linearithmic rate, and at roughly 10,000 tuples that
2186+ * factor will start to dominate over the linear costs of string
2187+ * transformation (this is a conservative estimate). The decay rate is
2188+ * chosen to be a little less aggressive than halving -- which (since
2189+ * we're called at points at which memtupcount has doubled) would never
2190+ * see the cost model actually abort past the first call following a
2191+ * decay. This decay rate is mostly a precaution against a sudden,
2192+ * violent swing in how well abbreviated cardinality tracks full key
2193+ * cardinality. The decay also serves to prevent a marginal case from
2194+ * being aborted too late, when too much has already been invested in
2195+ * string transformation.
2196+ *
2197+ * It's possible for sets of several million distinct strings with mere
2198+ * tens of thousands of distinct abbreviated keys to still benefit very
2199+ * significantly. This will generally occur provided each abbreviated
2200+ * key is a proxy for a roughly uniform number of the set's full keys.
2201+ * If it isn't so, we hope to catch that early and abort. If it isn't
2202+ * caught early, by the time the problem is apparent it's probably not
2203+ * worth aborting.
2204+ */
2205+ if (memtupcount > 10000 )
2206+ tss -> prop_card *= 0.65 ;
2207+
21762208 return false;
2209+ }
21772210
21782211 /*
21792212 * Abort abbreviation strategy.
@@ -2187,8 +2220,8 @@ bttext_abbrev_abort(int memtupcount, SortSupport ssup)
21872220 * lose but much to gain, which our strategy reflects.
21882221 */
21892222#ifdef DEBUG_ABBREV_KEYS
2190- elog (DEBUG_elog_output , "would have aborted abbreviation due to worst-case at %d. abbrev_distinct: %f, key_distinct: %f" ,
2191- memtupcount , abbrev_distinct , key_distinct );
2223+ elog (DEBUG_elog_output , "would have aborted abbreviation due to worst-case at %d. abbrev_distinct: %f, key_distinct: %f, prop_card: %f " ,
2224+ memtupcount , abbrev_distinct , key_distinct , tss -> prop_card );
21922225 /* Actually abort only when debugging is disabled */
21932226 return false;
21942227#endif
0 commit comments