@@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
4242 List * clause_list );
4343static Oid distinct_col_search (int colno , List * colnos , List * opids );
4444static bool is_innerrel_unique_for (PlannerInfo * root ,
45- RelOptInfo * outerrel ,
45+ Relids outerrelids ,
4646 RelOptInfo * innerrel ,
4747 JoinType jointype ,
4848 List * restrictlist );
@@ -495,6 +495,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
495495}
496496
497497
498+ /*
499+ * reduce_unique_semijoins
500+ * Check for semijoins that can be simplified to plain inner joins
501+ * because the inner relation is provably unique for the join clauses.
502+ *
503+ * Ideally this would happen during reduce_outer_joins, but we don't have
504+ * enough information at that point.
505+ *
506+ * To perform the strength reduction when applicable, we need only delete
507+ * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
508+ * bother fixing the join type attributed to it in the query jointree,
509+ * since that won't be consulted again.)
510+ */
511+ void
512+ reduce_unique_semijoins (PlannerInfo * root )
513+ {
514+ ListCell * lc ;
515+ ListCell * next ;
516+
517+ /*
518+ * Scan the join_info_list to find semijoins. We can't use foreach
519+ * because we may delete the current cell.
520+ */
521+ for (lc = list_head (root -> join_info_list ); lc != NULL ; lc = next )
522+ {
523+ SpecialJoinInfo * sjinfo = (SpecialJoinInfo * ) lfirst (lc );
524+ int innerrelid ;
525+ RelOptInfo * innerrel ;
526+ Relids joinrelids ;
527+ List * restrictlist ;
528+
529+ next = lnext (lc );
530+
531+ /*
532+ * Must be a non-delaying semijoin to a single baserel, else we aren't
533+ * going to be able to do anything with it. (It's probably not
534+ * possible for delay_upper_joins to be set on a semijoin, but we
535+ * might as well check.)
536+ */
537+ if (sjinfo -> jointype != JOIN_SEMI ||
538+ sjinfo -> delay_upper_joins )
539+ continue ;
540+
541+ if (!bms_get_singleton_member (sjinfo -> min_righthand , & innerrelid ))
542+ continue ;
543+
544+ innerrel = find_base_rel (root , innerrelid );
545+
546+ /*
547+ * Before we trouble to run generate_join_implied_equalities, make a
548+ * quick check to eliminate cases in which we will surely be unable to
549+ * prove uniqueness of the innerrel.
550+ */
551+ if (!rel_supports_distinctness (root , innerrel ))
552+ continue ;
553+
554+ /* Compute the relid set for the join we are considering */
555+ joinrelids = bms_union (sjinfo -> min_lefthand , sjinfo -> min_righthand );
556+
557+ /*
558+ * Since we're only considering a single-rel RHS, any join clauses it
559+ * has must be clauses linking it to the semijoin's min_lefthand. We
560+ * can also consider EC-derived join clauses.
561+ */
562+ restrictlist =
563+ list_concat (generate_join_implied_equalities (root ,
564+ joinrelids ,
565+ sjinfo -> min_lefthand ,
566+ innerrel ),
567+ innerrel -> joininfo );
568+
569+ /* Test whether the innerrel is unique for those clauses. */
570+ if (!innerrel_is_unique (root , sjinfo -> min_lefthand , innerrel ,
571+ JOIN_SEMI , restrictlist , true))
572+ continue ;
573+
574+ /* OK, remove the SpecialJoinInfo from the list. */
575+ root -> join_info_list = list_delete_ptr (root -> join_info_list , sjinfo );
576+ }
577+ }
578+
579+
498580/*
499581 * rel_supports_distinctness
500582 * Could the relation possibly be proven distinct on some set of columns?
@@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
857939 * Check if the innerrel provably contains at most one tuple matching any
858940 * tuple from the outerrel, based on join clauses in the 'restrictlist'.
859941 *
942+ * We need an actual RelOptInfo for the innerrel, but it's sufficient to
943+ * identify the outerrel by its Relids. This asymmetry supports use of this
944+ * function before joinrels have been built.
945+ *
860946 * The proof must be made based only on clauses that will be "joinquals"
861947 * rather than "otherquals" at execution. For an inner join there's no
862948 * difference; but if the join is outer, we must ignore pushed-down quals,
@@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids)
867953 *
868954 * The actual proof is undertaken by is_innerrel_unique_for(); this function
869955 * is a frontend that is mainly concerned with caching the answers.
956+ * In particular, the force_cache argument allows overriding the internal
957+ * heuristic about whether to cache negative answers; it should be "true"
958+ * if making an inquiry that is not part of the normal bottom-up join search
959+ * sequence.
870960 */
871961bool
872962innerrel_is_unique (PlannerInfo * root ,
873- RelOptInfo * outerrel ,
963+ Relids outerrelids ,
874964 RelOptInfo * innerrel ,
875965 JoinType jointype ,
876- List * restrictlist )
966+ List * restrictlist ,
967+ bool force_cache )
877968{
878969 MemoryContext old_context ;
879970 ListCell * lc ;
@@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root,
900991 {
901992 Relids unique_for_rels = (Relids ) lfirst (lc );
902993
903- if (bms_is_subset (unique_for_rels , outerrel -> relids ))
994+ if (bms_is_subset (unique_for_rels , outerrelids ))
904995 return true; /* Success! */
905996 }
906997
@@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root,
9121003 {
9131004 Relids unique_for_rels = (Relids ) lfirst (lc );
9141005
915- if (bms_is_subset (outerrel -> relids , unique_for_rels ))
1006+ if (bms_is_subset (outerrelids , unique_for_rels ))
9161007 return false;
9171008 }
9181009
9191010 /* No cached information, so try to make the proof. */
920- if (is_innerrel_unique_for (root , outerrel , innerrel ,
1011+ if (is_innerrel_unique_for (root , outerrelids , innerrel ,
9211012 jointype , restrictlist ))
9221013 {
9231014 /*
@@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root,
9321023 */
9331024 old_context = MemoryContextSwitchTo (root -> planner_cxt );
9341025 innerrel -> unique_for_rels = lappend (innerrel -> unique_for_rels ,
935- bms_copy (outerrel -> relids ));
1026+ bms_copy (outerrelids ));
9361027 MemoryContextSwitchTo (old_context );
9371028
9381029 return true; /* Success! */
@@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root,
9491040 * from smaller to larger. It is useful in GEQO mode, where the
9501041 * knowledge can be carried across successive planning attempts; and
9511042 * it's likely to be useful when using join-search plugins, too. Hence
952- * cache only when join_search_private is non-NULL. (Yeah, that's a
953- * hack, but it seems reasonable.)
1043+ * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1044+ * but it seems reasonable.)
1045+ *
1046+ * Also, allow callers to override that heuristic and force caching;
1047+ * that's useful for reduce_unique_semijoins, which calls here before
1048+ * the normal join search starts.
9541049 */
955- if (root -> join_search_private )
1050+ if (force_cache || root -> join_search_private )
9561051 {
9571052 old_context = MemoryContextSwitchTo (root -> planner_cxt );
9581053 innerrel -> non_unique_for_rels =
9591054 lappend (innerrel -> non_unique_for_rels ,
960- bms_copy (outerrel -> relids ));
1055+ bms_copy (outerrelids ));
9611056 MemoryContextSwitchTo (old_context );
9621057 }
9631058
@@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root,
9721067 */
9731068static bool
9741069is_innerrel_unique_for (PlannerInfo * root ,
975- RelOptInfo * outerrel ,
1070+ Relids outerrelids ,
9761071 RelOptInfo * innerrel ,
9771072 JoinType jointype ,
9781073 List * restrictlist )
@@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root,
10071102 * Check if clause has the form "outer op inner" or "inner op outer",
10081103 * and if so mark which side is inner.
10091104 */
1010- if (!clause_sides_match_join (restrictinfo , outerrel -> relids ,
1105+ if (!clause_sides_match_join (restrictinfo , outerrelids ,
10111106 innerrel -> relids ))
10121107 continue ; /* no good for these input relations */
10131108
0 commit comments