@@ -192,6 +192,10 @@ static void examine_simple_variable(PlannerInfo *root, Var *var,
192192static bool get_variable_range (PlannerInfo * root , VariableStatData * vardata ,
193193 Oid sortop , Oid collation ,
194194 Datum * min , Datum * max );
195+ static void get_stats_slot_range (AttStatsSlot * sslot ,
196+ Oid opfuncoid , FmgrInfo * opproc ,
197+ Oid collation , int16 typLen , bool typByVal ,
198+ Datum * min , Datum * max , bool * p_have_data );
195199static bool get_actual_variable_range (PlannerInfo * root ,
196200 VariableStatData * vardata ,
197201 Oid sortop , Oid collation ,
@@ -679,7 +683,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
679683 * compute the resulting contribution to selectivity.
680684 */
681685 hist_selec = ineq_histogram_selectivity (root , vardata ,
682- & opproc , isgt , iseq ,
686+ operator , & opproc , isgt , iseq ,
683687 collation ,
684688 constval , consttype );
685689
@@ -1019,6 +1023,9 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
10191023 * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
10201024 * The isgt and iseq flags distinguish which of the four cases apply.
10211025 *
1026+ * While opproc could be looked up from the operator OID, common callers
1027+ * also need to call it separately, so we make the caller pass both.
1028+ *
10221029 * Returns -1 if there is no histogram (valid results will always be >= 0).
10231030 *
10241031 * Note that the result disregards both the most-common-values (if any) and
@@ -1030,7 +1037,7 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
10301037double
10311038ineq_histogram_selectivity (PlannerInfo * root ,
10321039 VariableStatData * vardata ,
1033- FmgrInfo * opproc , bool isgt , bool iseq ,
1040+ Oid opoid , FmgrInfo * opproc , bool isgt , bool iseq ,
10341041 Oid collation ,
10351042 Datum constval , Oid consttype )
10361043{
@@ -1042,31 +1049,30 @@ ineq_histogram_selectivity(PlannerInfo *root,
10421049 /*
10431050 * Someday, ANALYZE might store more than one histogram per rel/att,
10441051 * corresponding to more than one possible sort ordering defined for the
1045- * column type. However, to make that work we will need to figure out
1046- * which staop to search for --- it's not necessarily the one we have at
1047- * hand! (For example, we might have a '<=' operator rather than the '<'
1048- * operator that will appear in staop.) The collation might not agree
1049- * either. For now, just assume that whatever appears in pg_statistic is
1050- * sorted the same way our operator sorts, or the reverse way if isgt is
1051- * true. This could result in a bogus estimate, but it still seems better
1052- * than falling back to the default estimate .
1052+ * column type. Right now, we know there is only one, so just grab it and
1053+ * see if it matches the query.
1054+ *
1055+ * Note that we can't use opoid as search argument; the staop appearing in
1056+ * pg_statistic will be for the relevant '<' operator, but what we have
1057+ * might be some other inequality operator such as '>='. (Even if opoid
1058+ * is a '<' operator, it could be cross-type.) Hence we must use
1059+ * comparison_ops_are_compatible() to see if the operators match .
10531060 */
10541061 if (HeapTupleIsValid (vardata -> statsTuple ) &&
10551062 statistic_proc_security_check (vardata , opproc -> fn_oid ) &&
10561063 get_attstatsslot (& sslot , vardata -> statsTuple ,
10571064 STATISTIC_KIND_HISTOGRAM , InvalidOid ,
10581065 ATTSTATSSLOT_VALUES ))
10591066 {
1060- if (sslot .nvalues > 1 )
1067+ if (sslot .nvalues > 1 &&
1068+ sslot .stacoll == collation &&
1069+ comparison_ops_are_compatible (sslot .staop , opoid ))
10611070 {
10621071 /*
10631072 * Use binary search to find the desired location, namely the
10641073 * right end of the histogram bin containing the comparison value,
10651074 * which is the leftmost entry for which the comparison operator
1066- * succeeds (if isgt) or fails (if !isgt). (If the given operator
1067- * isn't actually sort-compatible with the histogram, you'll get
1068- * garbage results ... but probably not any more garbage-y than
1069- * you would have from the old linear search.)
1075+ * succeeds (if isgt) or fails (if !isgt).
10701076 *
10711077 * In this loop, we pay no attention to whether the operator iseq
10721078 * or not; that detail will be mopped up below. (We cannot tell,
@@ -1332,6 +1338,50 @@ ineq_histogram_selectivity(PlannerInfo *root,
13321338 hist_selec = 1.0 - cutoff ;
13331339 }
13341340 }
1341+ else if (sslot .nvalues > 1 )
1342+ {
1343+ /*
1344+ * If we get here, we have a histogram but it's not sorted the way
1345+ * we want. Do a brute-force search to see how many of the
1346+ * entries satisfy the comparison condition, and take that
1347+ * fraction as our estimate. (This is identical to the inner loop
1348+ * of histogram_selectivity; maybe share code?)
1349+ */
1350+ LOCAL_FCINFO (fcinfo , 2 );
1351+ int nmatch = 0 ;
1352+
1353+ InitFunctionCallInfoData (* fcinfo , opproc , 2 , collation ,
1354+ NULL , NULL );
1355+ fcinfo -> args [0 ].isnull = false;
1356+ fcinfo -> args [1 ].isnull = false;
1357+ fcinfo -> args [1 ].value = constval ;
1358+ for (int i = 0 ; i < sslot .nvalues ; i ++ )
1359+ {
1360+ Datum fresult ;
1361+
1362+ fcinfo -> args [0 ].value = sslot .values [i ];
1363+ fcinfo -> isnull = false;
1364+ fresult = FunctionCallInvoke (fcinfo );
1365+ if (!fcinfo -> isnull && DatumGetBool (fresult ))
1366+ nmatch ++ ;
1367+ }
1368+ hist_selec = ((double ) nmatch ) / ((double ) sslot .nvalues );
1369+
1370+ /*
1371+ * As above, clamp to a hundredth of the histogram resolution.
1372+ * This case is surely even less trustworthy than the normal one,
1373+ * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the
1374+ * clamp should be more restrictive in this case?)
1375+ */
1376+ {
1377+ double cutoff = 0.01 / (double ) (sslot .nvalues - 1 );
1378+
1379+ if (hist_selec < cutoff )
1380+ hist_selec = cutoff ;
1381+ else if (hist_selec > 1.0 - cutoff )
1382+ hist_selec = 1.0 - cutoff ;
1383+ }
1384+ }
13351385
13361386 free_attstatsslot (& sslot );
13371387 }
@@ -5363,8 +5413,8 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
53635413 int16 typLen ;
53645414 bool typByVal ;
53655415 Oid opfuncoid ;
5416+ FmgrInfo opproc ;
53665417 AttStatsSlot sslot ;
5367- int i ;
53685418
53695419 /*
53705420 * XXX It's very tempting to try to use the actual column min and max, if
@@ -5395,78 +5445,56 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
53955445 (opfuncoid = get_opcode (sortop ))))
53965446 return false;
53975447
5448+ opproc .fn_oid = InvalidOid ; /* mark this as not looked up yet */
5449+
53985450 get_typlenbyval (vardata -> atttype , & typLen , & typByVal );
53995451
54005452 /*
5401- * If there is a histogram, grab the first and last values.
5402- *
5403- * If there is a histogram that is sorted with some other operator than
5404- * the one we want, fail --- this suggests that there is data we can't
5405- * use. XXX consider collation too.
5453+ * If there is a histogram with the ordering we want, grab the first and
5454+ * last values.
54065455 */
54075456 if (get_attstatsslot (& sslot , vardata -> statsTuple ,
54085457 STATISTIC_KIND_HISTOGRAM , sortop ,
54095458 ATTSTATSSLOT_VALUES ))
54105459 {
5411- if (sslot .nvalues > 0 )
5460+ if (sslot .stacoll == collation && sslot . nvalues > 0 )
54125461 {
54135462 tmin = datumCopy (sslot .values [0 ], typByVal , typLen );
54145463 tmax = datumCopy (sslot .values [sslot .nvalues - 1 ], typByVal , typLen );
54155464 have_data = true;
54165465 }
54175466 free_attstatsslot (& sslot );
54185467 }
5419- else if (get_attstatsslot (& sslot , vardata -> statsTuple ,
5420- STATISTIC_KIND_HISTOGRAM , InvalidOid ,
5421- 0 ))
5468+
5469+ /*
5470+ * Otherwise, if there is a histogram with some other ordering, scan it
5471+ * and get the min and max values according to the ordering we want. This
5472+ * of course may not find values that are really extremal according to our
5473+ * ordering, but it beats ignoring available data.
5474+ */
5475+ if (!have_data &&
5476+ get_attstatsslot (& sslot , vardata -> statsTuple ,
5477+ STATISTIC_KIND_HISTOGRAM , InvalidOid ,
5478+ ATTSTATSSLOT_VALUES ))
54225479 {
5480+ get_stats_slot_range (& sslot , opfuncoid , & opproc ,
5481+ collation , typLen , typByVal ,
5482+ & tmin , & tmax , & have_data );
54235483 free_attstatsslot (& sslot );
5424- return false;
54255484 }
54265485
54275486 /*
54285487 * If we have most-common-values info, look for extreme MCVs. This is
54295488 * needed even if we also have a histogram, since the histogram excludes
5430- * the MCVs. However, usually the MCVs will not be the extreme values, so
5431- * avoid unnecessary data copying.
5489+ * the MCVs.
54325490 */
54335491 if (get_attstatsslot (& sslot , vardata -> statsTuple ,
54345492 STATISTIC_KIND_MCV , InvalidOid ,
54355493 ATTSTATSSLOT_VALUES ))
54365494 {
5437- bool tmin_is_mcv = false;
5438- bool tmax_is_mcv = false;
5439- FmgrInfo opproc ;
5440-
5441- fmgr_info (opfuncoid , & opproc );
5442-
5443- for (i = 0 ; i < sslot .nvalues ; i ++ )
5444- {
5445- if (!have_data )
5446- {
5447- tmin = tmax = sslot .values [i ];
5448- tmin_is_mcv = tmax_is_mcv = have_data = true;
5449- continue ;
5450- }
5451- if (DatumGetBool (FunctionCall2Coll (& opproc ,
5452- collation ,
5453- sslot .values [i ], tmin )))
5454- {
5455- tmin = sslot .values [i ];
5456- tmin_is_mcv = true;
5457- }
5458- if (DatumGetBool (FunctionCall2Coll (& opproc ,
5459- collation ,
5460- tmax , sslot .values [i ])))
5461- {
5462- tmax = sslot .values [i ];
5463- tmax_is_mcv = true;
5464- }
5465- }
5466- if (tmin_is_mcv )
5467- tmin = datumCopy (tmin , typByVal , typLen );
5468- if (tmax_is_mcv )
5469- tmax = datumCopy (tmax , typByVal , typLen );
5495+ get_stats_slot_range (& sslot , opfuncoid , & opproc ,
5496+ collation , typLen , typByVal ,
5497+ & tmin , & tmax , & have_data );
54705498 free_attstatsslot (& sslot );
54715499 }
54725500
@@ -5475,6 +5503,62 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
54755503 return have_data ;
54765504}
54775505
5506+ /*
5507+ * get_stats_slot_range: scan sslot for min/max values
5508+ *
5509+ * Subroutine for get_variable_range: update min/max/have_data according
5510+ * to what we find in the statistics array.
5511+ */
5512+ static void
5513+ get_stats_slot_range (AttStatsSlot * sslot , Oid opfuncoid , FmgrInfo * opproc ,
5514+ Oid collation , int16 typLen , bool typByVal ,
5515+ Datum * min , Datum * max , bool * p_have_data )
5516+ {
5517+ Datum tmin = * min ;
5518+ Datum tmax = * max ;
5519+ bool have_data = * p_have_data ;
5520+ bool found_tmin = false;
5521+ bool found_tmax = false;
5522+
5523+ /* Look up the comparison function, if we didn't already do so */
5524+ if (opproc -> fn_oid != opfuncoid )
5525+ fmgr_info (opfuncoid , opproc );
5526+
5527+ /* Scan all the slot's values */
5528+ for (int i = 0 ; i < sslot -> nvalues ; i ++ )
5529+ {
5530+ if (!have_data )
5531+ {
5532+ tmin = tmax = sslot -> values [i ];
5533+ found_tmin = found_tmax = true;
5534+ * p_have_data = have_data = true;
5535+ continue ;
5536+ }
5537+ if (DatumGetBool (FunctionCall2Coll (opproc ,
5538+ collation ,
5539+ sslot -> values [i ], tmin )))
5540+ {
5541+ tmin = sslot -> values [i ];
5542+ found_tmin = true;
5543+ }
5544+ if (DatumGetBool (FunctionCall2Coll (opproc ,
5545+ collation ,
5546+ tmax , sslot -> values [i ])))
5547+ {
5548+ tmax = sslot -> values [i ];
5549+ found_tmax = true;
5550+ }
5551+ }
5552+
5553+ /*
5554+ * Copy the slot's values, if we found new extreme values.
5555+ */
5556+ if (found_tmin )
5557+ * min = datumCopy (tmin , typByVal , typLen );
5558+ if (found_tmax )
5559+ * max = datumCopy (tmax , typByVal , typLen );
5560+ }
5561+
54785562
54795563/*
54805564 * get_actual_variable_range
0 commit comments