@@ -37,6 +37,9 @@ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
3737static void remove_rel_from_query (PlannerInfo * root , int relid ,
3838 Relids joinrelids );
3939static List * remove_rel_from_joinlist (List * joinlist , int relid , int * nremoved );
40+ static bool rel_supports_distinctness (PlannerInfo * root , RelOptInfo * rel );
41+ static bool rel_is_distinct_for (PlannerInfo * root , RelOptInfo * rel ,
42+ List * clause_list );
4043static Oid distinct_col_search (int colno , List * colnos , List * opids );
4144
4245
@@ -152,7 +155,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
152155{
153156 int innerrelid ;
154157 RelOptInfo * innerrel ;
155- Query * subquery = NULL ;
156158 Relids joinrelids ;
157159 List * clause_list = NIL ;
158160 ListCell * l ;
@@ -171,38 +173,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
171173
172174 innerrel = find_base_rel (root , innerrelid );
173175
174- if (innerrel -> reloptkind != RELOPT_BASEREL )
175- return false;
176-
177176 /*
178177 * Before we go to the effort of checking whether any innerrel variables
179178 * are needed above the join, make a quick check to eliminate cases in
180179 * which we will surely be unable to prove uniqueness of the innerrel.
181180 */
182- if (innerrel -> rtekind == RTE_RELATION )
183- {
184- /*
185- * For a plain-relation innerrel, we only know how to prove uniqueness
186- * by reference to unique indexes. If there are no indexes then
187- * there's certainly no unique indexes so there's no point in going
188- * further.
189- */
190- if (innerrel -> indexlist == NIL )
191- return false;
192- }
193- else if (innerrel -> rtekind == RTE_SUBQUERY )
194- {
195- subquery = root -> simple_rte_array [innerrelid ]-> subquery ;
196-
197- /*
198- * If the subquery has no qualities that support distinctness proofs
199- * then there's no point in going further.
200- */
201- if (!query_supports_distinctness (subquery ))
202- return false;
203- }
204- else
205- return false; /* unsupported rtekind */
181+ if (!rel_supports_distinctness (root , innerrel ))
182+ return false;
206183
207184 /* Compute the relid set for the join we are considering */
208185 joinrelids = bms_union (sjinfo -> min_lefthand , sjinfo -> min_righthand );
@@ -291,7 +268,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
291268 continue ; /* not mergejoinable */
292269
293270 /*
294- * Check if clause has the form "outer op inner" or "inner op outer".
271+ * Check if clause has the form "outer op inner" or "inner op outer",
272+ * and if so mark which side is inner.
295273 */
296274 if (!clause_sides_match_join (restrictinfo , sjinfo -> min_lefthand ,
297275 innerrel -> relids ))
@@ -302,65 +280,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
302280 }
303281
304282 /*
305- * relation_has_unique_index_for automatically adds any usable restriction
306- * clauses for the innerrel, so we needn't do that here. (XXX we are not
307- * considering restriction clauses for subqueries; is that worth doing?)
283+ * Now that we have the relevant equality join clauses, try to prove the
284+ * innerrel distinct.
308285 */
309-
310- if (innerrel -> rtekind == RTE_RELATION )
311- {
312- /* Now examine the indexes to see if we have a matching unique index */
313- if (relation_has_unique_index_for (root , innerrel , clause_list , NIL , NIL ))
314- return true;
315- }
316- else /* innerrel->rtekind == RTE_SUBQUERY */
317- {
318- List * colnos = NIL ;
319- List * opids = NIL ;
320-
321- /*
322- * Build the argument lists for query_is_distinct_for: a list of
323- * output column numbers that the query needs to be distinct over, and
324- * a list of equality operators that the output columns need to be
325- * distinct according to.
326- */
327- foreach (l , clause_list )
328- {
329- RestrictInfo * rinfo = (RestrictInfo * ) lfirst (l );
330- Oid op ;
331- Var * var ;
332-
333- /*
334- * Get the equality operator we need uniqueness according to.
335- * (This might be a cross-type operator and thus not exactly the
336- * same operator the subquery would consider; that's all right
337- * since query_is_distinct_for can resolve such cases.) The
338- * mergejoinability test above should have selected only OpExprs.
339- */
340- Assert (IsA (rinfo -> clause , OpExpr ));
341- op = ((OpExpr * ) rinfo -> clause )-> opno ;
342-
343- /* clause_sides_match_join identified the inner side for us */
344- if (rinfo -> outer_is_left )
345- var = (Var * ) get_rightop (rinfo -> clause );
346- else
347- var = (Var * ) get_leftop (rinfo -> clause );
348-
349- /*
350- * If inner side isn't a Var referencing a subquery output column,
351- * this clause doesn't help us.
352- */
353- if (!var || !IsA (var , Var ) ||
354- var -> varno != innerrelid || var -> varlevelsup != 0 )
355- continue ;
356-
357- colnos = lappend_int (colnos , var -> varattno );
358- opids = lappend_oid (opids , op );
359- }
360-
361- if (query_is_distinct_for (subquery , colnos , opids ))
362- return true;
363- }
286+ if (rel_is_distinct_for (root , innerrel , clause_list ))
287+ return true;
364288
365289 /*
366290 * Some day it would be nice to check for other methods of establishing
@@ -561,6 +485,152 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
561485}
562486
563487
488+ /*
489+ * rel_supports_distinctness
490+ * Could the relation possibly be proven distinct on some set of columns?
491+ *
492+ * This is effectively a pre-checking function for rel_is_distinct_for().
493+ * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
494+ * with this rel, but it should not expend a lot of cycles. The idea is
495+ * that callers can avoid doing possibly-expensive processing to compute
496+ * rel_is_distinct_for()'s argument lists if the call could not possibly
497+ * succeed.
498+ */
499+ static bool
500+ rel_supports_distinctness (PlannerInfo * root , RelOptInfo * rel )
501+ {
502+ /* We only know about baserels ... */
503+ if (rel -> reloptkind != RELOPT_BASEREL )
504+ return false;
505+ if (rel -> rtekind == RTE_RELATION )
506+ {
507+ /*
508+ * For a plain relation, we only know how to prove uniqueness by
509+ * reference to unique indexes. Make sure there's at least one
510+ * suitable unique index. It must be immediately enforced, and if
511+ * it's a partial index, it must match the query. (Keep these
512+ * conditions in sync with relation_has_unique_index_for!)
513+ */
514+ ListCell * lc ;
515+
516+ foreach (lc , rel -> indexlist )
517+ {
518+ IndexOptInfo * ind = (IndexOptInfo * ) lfirst (lc );
519+
520+ if (ind -> unique && ind -> immediate &&
521+ (ind -> indpred == NIL || ind -> predOK ))
522+ return true;
523+ }
524+ }
525+ else if (rel -> rtekind == RTE_SUBQUERY )
526+ {
527+ Query * subquery = root -> simple_rte_array [rel -> relid ]-> subquery ;
528+
529+ /* Check if the subquery has any qualities that support distinctness */
530+ if (query_supports_distinctness (subquery ))
531+ return true;
532+ }
533+ /* We have no proof rules for any other rtekinds. */
534+ return false;
535+ }
536+
537+ /*
538+ * rel_is_distinct_for
539+ * Does the relation return only distinct rows according to clause_list?
540+ *
541+ * clause_list is a list of join restriction clauses involving this rel and
542+ * some other one. Return true if no two rows emitted by this rel could
543+ * possibly join to the same row of the other rel.
544+ *
545+ * The caller must have already determined that each condition is a
546+ * mergejoinable equality with an expression in this relation on one side, and
547+ * an expression not involving this relation on the other. The transient
548+ * outer_is_left flag is used to identify which side references this relation:
549+ * left side if outer_is_left is false, right side if it is true.
550+ *
551+ * Note that the passed-in clause_list may be destructively modified! This
552+ * is OK for current uses, because the clause_list is built by the caller for
553+ * the sole purpose of passing to this function.
554+ */
555+ static bool
556+ rel_is_distinct_for (PlannerInfo * root , RelOptInfo * rel , List * clause_list )
557+ {
558+ /*
559+ * We could skip a couple of tests here if we assume all callers checked
560+ * rel_supports_distinctness first, but it doesn't seem worth taking any
561+ * risk for.
562+ */
563+ if (rel -> reloptkind != RELOPT_BASEREL )
564+ return false;
565+ if (rel -> rtekind == RTE_RELATION )
566+ {
567+ /*
568+ * Examine the indexes to see if we have a matching unique index.
569+ * relation_has_unique_index_for automatically adds any usable
570+ * restriction clauses for the rel, so we needn't do that here.
571+ */
572+ if (relation_has_unique_index_for (root , rel , clause_list , NIL , NIL ))
573+ return true;
574+ }
575+ else if (rel -> rtekind == RTE_SUBQUERY )
576+ {
577+ Index relid = rel -> relid ;
578+ Query * subquery = root -> simple_rte_array [relid ]-> subquery ;
579+ List * colnos = NIL ;
580+ List * opids = NIL ;
581+ ListCell * l ;
582+
583+ /*
584+ * Build the argument lists for query_is_distinct_for: a list of
585+ * output column numbers that the query needs to be distinct over, and
586+ * a list of equality operators that the output columns need to be
587+ * distinct according to.
588+ *
589+ * (XXX we are not considering restriction clauses attached to the
590+ * subquery; is that worth doing?)
591+ */
592+ foreach (l , clause_list )
593+ {
594+ RestrictInfo * rinfo = (RestrictInfo * ) lfirst (l );
595+ Oid op ;
596+ Var * var ;
597+
598+ /*
599+ * Get the equality operator we need uniqueness according to.
600+ * (This might be a cross-type operator and thus not exactly the
601+ * same operator the subquery would consider; that's all right
602+ * since query_is_distinct_for can resolve such cases.) The
603+ * caller's mergejoinability test should have selected only
604+ * OpExprs.
605+ */
606+ Assert (IsA (rinfo -> clause , OpExpr ));
607+ op = ((OpExpr * ) rinfo -> clause )-> opno ;
608+
609+ /* caller identified the inner side for us */
610+ if (rinfo -> outer_is_left )
611+ var = (Var * ) get_rightop (rinfo -> clause );
612+ else
613+ var = (Var * ) get_leftop (rinfo -> clause );
614+
615+ /*
616+ * If inner side isn't a Var referencing a subquery output column,
617+ * this clause doesn't help us.
618+ */
619+ if (!var || !IsA (var , Var ) ||
620+ var -> varno != relid || var -> varlevelsup != 0 )
621+ continue ;
622+
623+ colnos = lappend_int (colnos , var -> varattno );
624+ opids = lappend_oid (opids , op );
625+ }
626+
627+ if (query_is_distinct_for (subquery , colnos , opids ))
628+ return true;
629+ }
630+ return false;
631+ }
632+
633+
564634/*
565635 * query_supports_distinctness - could the query possibly be proven distinct
566636 * on some set of output columns?
0 commit comments