@@ -40,8 +40,8 @@ static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
4040 const RangeBound * constbound ,
4141 const RangeBound * hist , int hist_nvalues ,
4242 bool equal );
43- static int rbound_bsearch (TypeCacheEntry * typcache , const RangeBound * value ,
44- const RangeBound * hist , int hist_length , bool equal );
43+ static int rbound_bsearch (TypeCacheEntry * typcache , const RangeBound * value ,
44+ const RangeBound * hist , int hist_length , bool equal );
4545static float8 get_position (TypeCacheEntry * typcache , const RangeBound * value ,
4646 const RangeBound * hist1 , const RangeBound * hist2 );
4747static float8 get_len_position (double value , double hist1 , double hist2 );
@@ -400,6 +400,13 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
400400 ATTSTATSSLOT_VALUES )))
401401 return -1.0 ;
402402
403+ /* check that it's a histogram, not just a dummy entry */
404+ if (hslot .nvalues < 2 )
405+ {
406+ free_attstatsslot (& hslot );
407+ return -1.0 ;
408+ }
409+
403410 /*
404411 * Convert histogram of ranges into histograms of its lower and upper
405412 * bounds.
@@ -686,7 +693,8 @@ get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound
686693 /*
687694 * Both bounds are finite. Assuming the subtype's comparison function
688695 * works sanely, the value must be finite, too, because it lies
689- * somewhere between the bounds. If it doesn't, just return something.
696+ * somewhere between the bounds. If it doesn't, arbitrarily return
697+ * 0.5.
690698 */
691699 if (value -> infinite )
692700 return 0.5 ;
@@ -696,21 +704,22 @@ get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound
696704 return 0.5 ;
697705
698706 /* Calculate relative position using subdiff function. */
699- bin_width = DatumGetFloat8 (FunctionCall2Coll (
700- & typcache -> rng_subdiff_finfo ,
707+ bin_width = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
701708 typcache -> rng_collation ,
702709 hist2 -> val ,
703710 hist1 -> val ));
704- if (bin_width <= 0.0 )
705- return 0.5 ; /* zero width bin */
711+ if (isnan ( bin_width ) || bin_width <= 0.0 )
712+ return 0.5 ; /* punt for NaN or zero- width bin */
706713
707- position = DatumGetFloat8 (FunctionCall2Coll (
708- & typcache -> rng_subdiff_finfo ,
714+ position = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
709715 typcache -> rng_collation ,
710716 value -> val ,
711717 hist1 -> val ))
712718 / bin_width ;
713719
720+ if (isnan (position ))
721+ return 0.5 ; /* punt for NaN from subdiff, Inf/Inf, etc */
722+
714723 /* Relative position must be in [0,1] range */
715724 position = Max (position , 0.0 );
716725 position = Min (position , 1.0 );
@@ -802,15 +811,23 @@ get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBoun
802811 if (!bound1 -> infinite && !bound2 -> infinite )
803812 {
804813 /*
805- * No bounds are infinite, use subdiff function or return default
814+ * Neither bound is infinite, use subdiff function or return default
806815 * value of 1.0 if no subdiff is available.
807816 */
808817 if (has_subdiff )
809- return
810- DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
811- typcache -> rng_collation ,
812- bound2 -> val ,
813- bound1 -> val ));
818+ {
819+ float8 res ;
820+
821+ res = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
822+ typcache -> rng_collation ,
823+ bound2 -> val ,
824+ bound1 -> val ));
825+ /* Reject possible NaN result, also negative result */
826+ if (isnan (res ) || res < 0.0 )
827+ return 1.0 ;
828+ else
829+ return res ;
830+ }
814831 else
815832 return 1.0 ;
816833 }
@@ -824,7 +841,7 @@ get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBoun
824841 }
825842 else
826843 {
827- /* One bound is infinite, another is not */
844+ /* One bound is infinite, the other is not */
828845 return get_float8_infinity ();
829846 }
830847}
@@ -1020,17 +1037,31 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
10201037 upper_index = rbound_bsearch (typcache , upper , hist_lower , hist_nvalues ,
10211038 false);
10221039
1040+ /*
1041+ * If the upper bound value is below the histogram's lower limit, there
1042+ * are no matches.
1043+ */
1044+ if (upper_index < 0 )
1045+ return 0.0 ;
1046+
1047+ /*
1048+ * If the upper bound value is at or beyond the histogram's upper limit,
1049+ * start our loop at the last actual bin, as though the upper bound were
1050+ * within that bin; get_position will clamp its result to 1.0 anyway.
1051+ * (This corresponds to assuming that the data population above the
1052+ * histogram's upper limit is empty, exactly like what we just assumed for
1053+ * the lower limit.)
1054+ */
1055+ upper_index = Min (upper_index , hist_nvalues - 2 );
1056+
10231057 /*
10241058 * Calculate upper_bin_width, ie. the fraction of the (upper_index,
10251059 * upper_index + 1) bin which is greater than upper bound of query range
10261060 * using linear interpolation of subdiff function.
10271061 */
1028- if (upper_index >= 0 && upper_index < hist_nvalues - 1 )
1029- upper_bin_width = get_position (typcache , upper ,
1030- & hist_lower [upper_index ],
1031- & hist_lower [upper_index + 1 ]);
1032- else
1033- upper_bin_width = 0.0 ;
1062+ upper_bin_width = get_position (typcache , upper ,
1063+ & hist_lower [upper_index ],
1064+ & hist_lower [upper_index + 1 ]);
10341065
10351066 /*
10361067 * In the loop, dist and prev_dist are the distance of the "current" bin's
@@ -1103,9 +1134,6 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
11031134 * of ranges that contain the constant lower and upper bounds. This uses
11041135 * the histograms of range lower bounds and range lengths, on the assumption
11051136 * that the range lengths are independent of the lower bounds.
1106- *
1107- * Note, this is "var @> const", ie. estimate the fraction of ranges that
1108- * contain the constant lower and upper bounds.
11091137 */
11101138static double
11111139calc_hist_selectivity_contains (TypeCacheEntry * typcache ,
@@ -1124,16 +1152,30 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
11241152 lower_index = rbound_bsearch (typcache , lower , hist_lower , hist_nvalues ,
11251153 true);
11261154
1155+ /*
1156+ * If the lower bound value is below the histogram's lower limit, there
1157+ * are no matches.
1158+ */
1159+ if (lower_index < 0 )
1160+ return 0.0 ;
1161+
1162+ /*
1163+ * If the lower bound value is at or beyond the histogram's upper limit,
1164+ * start our loop at the last actual bin, as though the upper bound were
1165+ * within that bin; get_position will clamp its result to 1.0 anyway.
1166+ * (This corresponds to assuming that the data population above the
1167+ * histogram's upper limit is empty, exactly like what we just assumed for
1168+ * the lower limit.)
1169+ */
1170+ lower_index = Min (lower_index , hist_nvalues - 2 );
1171+
11271172 /*
11281173 * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
11291174 * lower_index + 1) bin which is greater than lower bound of query range
11301175 * using linear interpolation of subdiff function.
11311176 */
1132- if (lower_index >= 0 && lower_index < hist_nvalues - 1 )
1133- lower_bin_width = get_position (typcache , lower , & hist_lower [lower_index ],
1134- & hist_lower [lower_index + 1 ]);
1135- else
1136- lower_bin_width = 0.0 ;
1177+ lower_bin_width = get_position (typcache , lower , & hist_lower [lower_index ],
1178+ & hist_lower [lower_index + 1 ]);
11371179
11381180 /*
11391181 * Loop through all the lower bound bins, smaller than the query lower
0 commit comments