@@ -44,6 +44,12 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
4444 bool varonleft , bool isLTsel , Selectivity s2 );
4545static RelOptInfo * find_single_rel_for_clauses (PlannerInfo * root ,
4646 List * clauses );
47+ static Selectivity clauselist_selectivity_or (PlannerInfo * root ,
48+ List * clauses ,
49+ int varRelid ,
50+ JoinType jointype ,
51+ SpecialJoinInfo * sjinfo ,
52+ bool use_extended_stats );
4753
4854/****************************************************************************
4955 * ROUTINES TO COMPUTE SELECTIVITIES
@@ -61,64 +67,10 @@ static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
6167 *
6268 * The basic approach is to apply extended statistics first, on as many
6369 * clauses as possible, in order to capture cross-column dependencies etc.
64- * The remaining clauses are then estimated using regular statistics tracked
65- * for individual columns. This is done by simply passing the clauses to
66- * clauselist_selectivity_simple.
67- */
68- Selectivity
69- clauselist_selectivity (PlannerInfo * root ,
70- List * clauses ,
71- int varRelid ,
72- JoinType jointype ,
73- SpecialJoinInfo * sjinfo )
74- {
75- Selectivity s1 = 1.0 ;
76- RelOptInfo * rel ;
77- Bitmapset * estimatedclauses = NULL ;
78-
79- /*
80- * Determine if these clauses reference a single relation. If so, and if
81- * it has extended statistics, try to apply those.
82- */
83- rel = find_single_rel_for_clauses (root , clauses );
84- if (rel && rel -> rtekind == RTE_RELATION && rel -> statlist != NIL )
85- {
86- /*
87- * Estimate as many clauses as possible using extended statistics.
88- *
89- * 'estimatedclauses' tracks the 0-based list position index of
90- * clauses that we've estimated using extended statistics, and that
91- * should be ignored.
92- */
93- s1 *= statext_clauselist_selectivity (root , clauses , varRelid ,
94- jointype , sjinfo , rel ,
95- & estimatedclauses );
96- }
97-
98- /*
99- * Apply normal selectivity estimates for the remaining clauses, passing
100- * 'estimatedclauses' so that it skips already estimated ones.
101- */
102- return s1 * clauselist_selectivity_simple (root , clauses , varRelid ,
103- jointype , sjinfo ,
104- estimatedclauses );
105- }
106-
107- /*
108- * clauselist_selectivity_simple -
109- * Compute the selectivity of an implicitly-ANDed list of boolean
110- * expression clauses. The list can be empty, in which case 1.0
111- * must be returned. List elements may be either RestrictInfos
112- * or bare expression clauses --- the former is preferred since
113- * it allows caching of results. The estimatedclauses bitmap tracks
114- * clauses that have already been estimated by other means.
115- *
116- * See clause_selectivity() for the meaning of the additional parameters.
117- *
118- * Our basic approach is to take the product of the selectivities of the
119- * subclauses. However, that's only right if the subclauses have independent
120- * probabilities, and in reality they are often NOT independent. So,
121- * we want to be smarter where we can.
70+ * The remaining clauses are then estimated by taking the product of their
71+ * selectivities, but that's only right if they have independent
72+ * probabilities, and in reality they are often NOT independent even if they
73+ * only refer to a single column. So, we want to be smarter where we can.
12274 *
12375 * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
12476 * are recognized as possible range query components if they are restriction
@@ -147,28 +99,68 @@ clauselist_selectivity(PlannerInfo *root,
14799 * selectivity functions; perhaps some day we can generalize the approach.
148100 */
149101Selectivity
150- clauselist_selectivity_simple (PlannerInfo * root ,
151- List * clauses ,
152- int varRelid ,
153- JoinType jointype ,
154- SpecialJoinInfo * sjinfo ,
155- Bitmapset * estimatedclauses )
102+ clauselist_selectivity (PlannerInfo * root ,
103+ List * clauses ,
104+ int varRelid ,
105+ JoinType jointype ,
106+ SpecialJoinInfo * sjinfo )
107+ {
108+ return clauselist_selectivity_ext (root , clauses , varRelid ,
109+ jointype , sjinfo , true);
110+ }
111+
112+ /*
113+ * clauselist_selectivity_ext -
114+ * Extended version of clauselist_selectivity(). If "use_extended_stats"
115+ * is false, all extended statistics will be ignored, and only per-column
116+ * statistics will be used.
117+ */
118+ Selectivity
119+ clauselist_selectivity_ext (PlannerInfo * root ,
120+ List * clauses ,
121+ int varRelid ,
122+ JoinType jointype ,
123+ SpecialJoinInfo * sjinfo ,
124+ bool use_extended_stats )
156125{
157126 Selectivity s1 = 1.0 ;
127+ RelOptInfo * rel ;
128+ Bitmapset * estimatedclauses = NULL ;
158129 RangeQueryClause * rqlist = NULL ;
159130 ListCell * l ;
160131 int listidx ;
161132
162133 /*
163- * If there's exactly one clause (and it was not estimated yet), just go
164- * directly to clause_selectivity(). None of what we might do below is
165- * relevant.
134+ * If there's exactly one clause, just go directly to
135+ * clause_selectivity_ext(). None of what we might do below is relevant.
166136 */
167- if (list_length (clauses ) == 1 && bms_is_empty (estimatedclauses ))
168- return clause_selectivity (root , (Node * ) linitial (clauses ),
169- varRelid , jointype , sjinfo );
137+ if (list_length (clauses ) == 1 )
138+ return clause_selectivity_ext (root , (Node * ) linitial (clauses ),
139+ varRelid , jointype , sjinfo ,
140+ use_extended_stats );
141+
142+ /*
143+ * Determine if these clauses reference a single relation. If so, and if
144+ * it has extended statistics, try to apply those.
145+ */
146+ rel = find_single_rel_for_clauses (root , clauses );
147+ if (use_extended_stats && rel && rel -> rtekind == RTE_RELATION && rel -> statlist != NIL )
148+ {
149+ /*
150+ * Estimate as many clauses as possible using extended statistics.
151+ *
152+ * 'estimatedclauses' is populated with the 0-based list position
153+ * index of clauses estimated here, and that should be ignored below.
154+ */
155+ s1 = statext_clauselist_selectivity (root , clauses , varRelid ,
156+ jointype , sjinfo , rel ,
157+ & estimatedclauses , false);
158+ }
170159
171160 /*
161+ * Apply normal selectivity estimates for remaining clauses. We'll be
162+ * careful to skip any clauses which were already estimated above.
163+ *
172164 * Anything that doesn't look like a potential rangequery clause gets
173165 * multiplied into s1 and forgotten. Anything that does gets inserted into
174166 * an rqlist entry.
@@ -189,8 +181,9 @@ clauselist_selectivity_simple(PlannerInfo *root,
189181 if (bms_is_member (listidx , estimatedclauses ))
190182 continue ;
191183
192- /* Always compute the selectivity using clause_selectivity */
193- s2 = clause_selectivity (root , clause , varRelid , jointype , sjinfo );
184+ /* Compute the selectivity of this clause in isolation */
185+ s2 = clause_selectivity_ext (root , clause , varRelid , jointype , sjinfo ,
186+ use_extended_stats );
194187
195188 /*
196189 * Check for being passed a RestrictInfo.
@@ -350,6 +343,83 @@ clauselist_selectivity_simple(PlannerInfo *root,
350343 return s1 ;
351344}
352345
346+ /*
347+ * clauselist_selectivity_or -
348+ * Compute the selectivity of an implicitly-ORed list of boolean
349+ * expression clauses. The list can be empty, in which case 0.0
350+ * must be returned. List elements may be either RestrictInfos
351+ * or bare expression clauses --- the former is preferred since
352+ * it allows caching of results.
353+ *
354+ * See clause_selectivity() for the meaning of the additional parameters.
355+ *
356+ * The basic approach is to apply extended statistics first, on as many
357+ * clauses as possible, in order to capture cross-column dependencies etc.
358+ * The remaining clauses are then estimated as if they were independent.
359+ */
360+ static Selectivity
361+ clauselist_selectivity_or (PlannerInfo * root ,
362+ List * clauses ,
363+ int varRelid ,
364+ JoinType jointype ,
365+ SpecialJoinInfo * sjinfo ,
366+ bool use_extended_stats )
367+ {
368+ Selectivity s1 = 0.0 ;
369+ RelOptInfo * rel ;
370+ Bitmapset * estimatedclauses = NULL ;
371+ ListCell * lc ;
372+ int listidx ;
373+
374+ /*
375+ * Determine if these clauses reference a single relation. If so, and if
376+ * it has extended statistics, try to apply those.
377+ */
378+ rel = find_single_rel_for_clauses (root , clauses );
379+ if (use_extended_stats && rel && rel -> rtekind == RTE_RELATION && rel -> statlist != NIL )
380+ {
381+ /*
382+ * Estimate as many clauses as possible using extended statistics.
383+ *
384+ * 'estimatedclauses' is populated with the 0-based list position
385+ * index of clauses estimated here, and that should be ignored below.
386+ */
387+ s1 = statext_clauselist_selectivity (root , clauses , varRelid ,
388+ jointype , sjinfo , rel ,
389+ & estimatedclauses , true);
390+ }
391+
392+ /*
393+ * Estimate the remaining clauses as if they were independent.
394+ *
395+ * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396+ * for the probable overlap of selected tuple sets.
397+ *
398+ * XXX is this too conservative?
399+ */
400+ listidx = -1 ;
401+ foreach (lc , clauses )
402+ {
403+ Selectivity s2 ;
404+
405+ listidx ++ ;
406+
407+ /*
408+ * Skip this clause if it's already been estimated by some other
409+ * statistics above.
410+ */
411+ if (bms_is_member (listidx , estimatedclauses ))
412+ continue ;
413+
414+ s2 = clause_selectivity_ext (root , (Node * ) lfirst (lc ), varRelid ,
415+ jointype , sjinfo , use_extended_stats );
416+
417+ s1 = s1 + s2 - s1 * s2 ;
418+ }
419+
420+ return s1 ;
421+ }
422+
353423/*
354424 * addRangeClause --- add a new range clause for clauselist_selectivity
355425 *
@@ -601,6 +671,24 @@ clause_selectivity(PlannerInfo *root,
601671 int varRelid ,
602672 JoinType jointype ,
603673 SpecialJoinInfo * sjinfo )
674+ {
675+ return clause_selectivity_ext (root , clause , varRelid ,
676+ jointype , sjinfo , true);
677+ }
678+
679+ /*
680+ * clause_selectivity_ext -
681+ * Extended version of clause_selectivity(). If "use_extended_stats" is
682+ * false, all extended statistics will be ignored, and only per-column
683+ * statistics will be used.
684+ */
685+ Selectivity
686+ clause_selectivity_ext (PlannerInfo * root ,
687+ Node * clause ,
688+ int varRelid ,
689+ JoinType jointype ,
690+ SpecialJoinInfo * sjinfo ,
691+ bool use_extended_stats )
604692{
605693 Selectivity s1 = 0.5 ; /* default for any unhandled clause type */
606694 RestrictInfo * rinfo = NULL ;
@@ -716,42 +804,35 @@ clause_selectivity(PlannerInfo *root,
716804 else if (is_notclause (clause ))
717805 {
718806 /* inverse of the selectivity of the underlying clause */
719- s1 = 1.0 - clause_selectivity (root ,
720- (Node * ) get_notclausearg ((Expr * ) clause ),
721- varRelid ,
722- jointype ,
723- sjinfo );
807+ s1 = 1.0 - clause_selectivity_ext (root ,
808+ (Node * ) get_notclausearg ((Expr * ) clause ),
809+ varRelid ,
810+ jointype ,
811+ sjinfo ,
812+ use_extended_stats );
724813 }
725814 else if (is_andclause (clause ))
726815 {
727816 /* share code with clauselist_selectivity() */
728- s1 = clauselist_selectivity (root ,
729- ((BoolExpr * ) clause )-> args ,
730- varRelid ,
731- jointype ,
732- sjinfo );
817+ s1 = clauselist_selectivity_ext (root ,
818+ ((BoolExpr * ) clause )-> args ,
819+ varRelid ,
820+ jointype ,
821+ sjinfo ,
822+ use_extended_stats );
733823 }
734824 else if (is_orclause (clause ))
735825 {
736826 /*
737- * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
738- * account for the probable overlap of selected tuple sets.
739- *
740- * XXX is this too conservative?
827+ * Almost the same thing as clauselist_selectivity, but with the
828+ * clauses connected by OR.
741829 */
742- ListCell * arg ;
743-
744- s1 = 0.0 ;
745- foreach (arg , ((BoolExpr * ) clause )-> args )
746- {
747- Selectivity s2 = clause_selectivity (root ,
748- (Node * ) lfirst (arg ),
749- varRelid ,
750- jointype ,
751- sjinfo );
752-
753- s1 = s1 + s2 - s1 * s2 ;
754- }
830+ s1 = clauselist_selectivity_or (root ,
831+ ((BoolExpr * ) clause )-> args ,
832+ varRelid ,
833+ jointype ,
834+ sjinfo ,
835+ use_extended_stats );
755836 }
756837 else if (is_opclause (clause ) || IsA (clause , DistinctExpr ))
757838 {
@@ -852,20 +933,22 @@ clause_selectivity(PlannerInfo *root,
852933 else if (IsA (clause , RelabelType ))
853934 {
854935 /* Not sure this case is needed, but it can't hurt */
855- s1 = clause_selectivity (root ,
856- (Node * ) ((RelabelType * ) clause )-> arg ,
857- varRelid ,
858- jointype ,
859- sjinfo );
936+ s1 = clause_selectivity_ext (root ,
937+ (Node * ) ((RelabelType * ) clause )-> arg ,
938+ varRelid ,
939+ jointype ,
940+ sjinfo ,
941+ use_extended_stats );
860942 }
861943 else if (IsA (clause , CoerceToDomain ))
862944 {
863945 /* Not sure this case is needed, but it can't hurt */
864- s1 = clause_selectivity (root ,
865- (Node * ) ((CoerceToDomain * ) clause )-> arg ,
866- varRelid ,
867- jointype ,
868- sjinfo );
946+ s1 = clause_selectivity_ext (root ,
947+ (Node * ) ((CoerceToDomain * ) clause )-> arg ,
948+ varRelid ,
949+ jointype ,
950+ sjinfo ,
951+ use_extended_stats );
869952 }
870953 else
871954 {
0 commit comments