66 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
77 * Portions Copyright (c) 1994, Regents of the University of California
88 *
9- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.89 2009/07/16 20:55:44 tgl Exp $
9+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.90 2009/07/19 21:00:43 tgl Exp $
1010 *
1111 *-------------------------------------------------------------------------
1212 */
3232#include "utils/memutils.h"
3333
3434
35+ /* A "clump" of already-joined relations within gimme_tree */
36+ typedef struct
37+ {
38+ RelOptInfo * joinrel ; /* joinrel for the set of relations */
39+ int size ; /* number of input relations in clump */
40+ } Clump ;
41+
42+ static List * merge_clump (PlannerInfo * root , List * clumps , Clump * new_clump ,
43+ bool force );
3544static bool desirable_join (PlannerInfo * root ,
3645 RelOptInfo * outer_rel , RelOptInfo * inner_rel );
3746
@@ -51,20 +60,6 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
5160 int savelength ;
5261 struct HTAB * savehash ;
5362
54- /*
55- * Because gimme_tree considers both left- and right-sided trees, there is
56- * no difference between a tour (a,b,c,d,...) and a tour (b,a,c,d,...) ---
57- * the same join orders will be considered. To avoid redundant cost
58- * calculations, we simply reject tours where tour[0] > tour[1], assigning
59- * them an artificially bad fitness.
60- *
61- * init_tour() is aware of this rule and so we should never reject a tour
62- * during the initial filling of the pool. It seems difficult to persuade
63- * the recombination logic never to break the rule, however.
64- */
65- if (num_gene >= 2 && tour [0 ] > tour [1 ])
66- return DBL_MAX ;
67-
6863 /*
6964 * Create a private memory context that will hold all temp storage
7065 * allocated inside gimme_tree().
@@ -108,10 +103,7 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
108103 * XXX geqo does not currently support optimization for partial result
109104 * retrieval --- how to fix?
110105 */
111- if (joinrel )
112- fitness = joinrel -> cheapest_total_path -> total_cost ;
113- else
114- fitness = DBL_MAX ;
106+ fitness = joinrel -> cheapest_total_path -> total_cost ;
115107
116108 /*
117109 * Restore join_rel_list to its former state, and put back original
@@ -136,114 +128,174 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
136128 * 'tour' is the proposed join order, of length 'num_gene'
137129 *
138130 * Returns a new join relation whose cheapest path is the best plan for
139- * this join order. NB: will return NULL if join order is invalid.
131+ * this join order.
140132 *
141133 * The original implementation of this routine always joined in the specified
142134 * order, and so could only build left-sided plans (and right-sided and
143135 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
144136 * It could never produce a "bushy" plan. This had a couple of big problems,
145- * of which the worst was that as of 7.4, there are situations involving IN
146- * subqueries where the only valid plans are bushy.
137+ * of which the worst was that there are situations involving join order
138+ * restrictions where the only valid plans are bushy.
147139 *
148140 * The present implementation takes the given tour as a guideline, but
149- * postpones joins that seem unsuitable according to some heuristic rules.
150- * This allows correct bushy plans to be generated at need, and as a nice
151- * side-effect it seems to materially improve the quality of the generated
152- * plans.
141+ * postpones joins that are illegal or seem unsuitable according to some
142+ * heuristic rules. This allows correct bushy plans to be generated at need,
143+ * and as a nice side-effect it seems to materially improve the quality of the
144+ * generated plans.
153145 */
154146RelOptInfo *
155147gimme_tree (PlannerInfo * root , Gene * tour , int num_gene )
156148{
157149 GeqoPrivateData * private = (GeqoPrivateData * ) root -> join_search_private ;
158- RelOptInfo * * stack ;
159- int stack_depth ;
160- RelOptInfo * joinrel ;
150+ List * clumps ;
161151 int rel_count ;
162152
163153 /*
164- * Create a stack to hold not-yet-joined relations.
154+ * Sometimes, a relation can't yet be joined to others due to heuristics
155+ * or actual semantic restrictions. We maintain a list of "clumps" of
156+ * successfully joined relations, with larger clumps at the front.
157+ * Each new relation from the tour is added to the first clump it can
158+ * be joined to; if there is none then it becomes a new clump of its own.
159+ * When we enlarge an existing clump we check to see if it can now be
160+ * merged with any other clumps. After the tour is all scanned, we
161+ * forget about the heuristics and try to forcibly join any remaining
162+ * clumps. Some forced joins might still fail due to semantics, but
163+ * we should always be able to find some join order that works.
165164 */
166- stack = (RelOptInfo * * ) palloc (num_gene * sizeof (RelOptInfo * ));
167- stack_depth = 0 ;
165+ clumps = NIL ;
168166
169- /*
170- * Push each relation onto the stack in the specified order. After
171- * pushing each relation, see whether the top two stack entries are
172- * joinable according to the desirable_join() heuristics. If so, join
173- * them into one stack entry, and try again to combine with the next stack
174- * entry down (if any). When the stack top is no longer joinable,
175- * continue to the next input relation. After we have pushed the last
176- * input relation, the heuristics are disabled and we force joining all
177- * the remaining stack entries.
178- *
179- * If desirable_join() always returns true, this produces a straight
180- * left-to-right join just like the old code. Otherwise we may produce a
181- * bushy plan or a left/right-sided plan that really corresponds to some
182- * tour other than the one given. To the extent that the heuristics are
183- * helpful, however, this will be a better plan than the raw tour.
184- *
185- * Also, when a join attempt fails (because of OJ or IN constraints), we
186- * may be able to recover and produce a workable plan, where the old code
187- * just had to give up. This case acts the same as a false result from
188- * desirable_join().
189- */
190167 for (rel_count = 0 ; rel_count < num_gene ; rel_count ++ )
191168 {
192169 int cur_rel_index ;
170+ RelOptInfo * cur_rel ;
171+ Clump * cur_clump ;
193172
194- /* Get the next input relation and push it */
173+ /* Get the next input relation */
195174 cur_rel_index = (int ) tour [rel_count ];
196- stack [stack_depth ] = (RelOptInfo * ) list_nth (private -> initial_rels ,
197- cur_rel_index - 1 );
198- stack_depth ++ ;
199-
200- /*
201- * While it's feasible, pop the top two stack entries and replace with
202- * their join.
203- */
204- while (stack_depth >= 2 )
175+ cur_rel = (RelOptInfo * ) list_nth (private -> initial_rels ,
176+ cur_rel_index - 1 );
177+
178+ /* Make it into a single-rel clump */
179+ cur_clump = (Clump * ) palloc (sizeof (Clump ));
180+ cur_clump -> joinrel = cur_rel ;
181+ cur_clump -> size = 1 ;
182+
183+ /* Merge it into the clumps list, using only desirable joins */
184+ clumps = merge_clump (root , clumps , cur_clump , false);
185+ }
186+
187+ if (list_length (clumps ) > 1 )
188+ {
189+ /* Force-join the remaining clumps in some legal order */
190+ List * fclumps ;
191+ ListCell * lc ;
192+
193+ fclumps = NIL ;
194+ foreach (lc , clumps )
205195 {
206- RelOptInfo * outer_rel = stack [stack_depth - 2 ];
207- RelOptInfo * inner_rel = stack [stack_depth - 1 ];
196+ Clump * clump = (Clump * ) lfirst (lc );
208197
209- /*
210- * Don't pop if heuristics say not to join now. However, once we
211- * have exhausted the input, the heuristics can't prevent popping.
212- */
213- if (rel_count < num_gene - 1 &&
214- !desirable_join (root , outer_rel , inner_rel ))
215- break ;
198+ fclumps = merge_clump (root , fclumps , clump , true);
199+ }
200+ clumps = fclumps ;
201+ }
202+
203+ /* Did we succeed in forming a single join relation? */
204+ if (list_length (clumps ) != 1 )
205+ elog (ERROR , "failed to join all relations together" );
206+
207+ return ((Clump * ) linitial (clumps ))-> joinrel ;
208+ }
209+
210+ /*
211+ * Merge a "clump" into the list of existing clumps for gimme_tree.
212+ *
213+ * We try to merge the clump into some existing clump, and repeat if
214+ * successful. When no more merging is possible, insert the clump
215+ * into the list, preserving the list ordering rule (namely, that
216+ * clumps of larger size appear earlier).
217+ *
218+ * If force is true, merge anywhere a join is legal, even if it causes
219+ * a cartesian join to be performed. When force is false, do only
220+ * "desirable" joins.
221+ */
222+ static List *
223+ merge_clump (PlannerInfo * root , List * clumps , Clump * new_clump , bool force )
224+ {
225+ ListCell * prev ;
226+ ListCell * lc ;
227+
228+ /* Look for a clump that new_clump can join to */
229+ prev = NULL ;
230+ foreach (lc , clumps )
231+ {
232+ Clump * old_clump = (Clump * ) lfirst (lc );
233+
234+ if (force ||
235+ desirable_join (root , old_clump -> joinrel , new_clump -> joinrel ))
236+ {
237+ RelOptInfo * joinrel ;
216238
217239 /*
218240 * Construct a RelOptInfo representing the join of these two input
219241 * relations. Note that we expect the joinrel not to exist in
220242 * root->join_rel_list yet, and so the paths constructed for it
221243 * will only include the ones we want.
222244 */
223- joinrel = make_join_rel (root , outer_rel , inner_rel );
224-
225- /* Can't pop stack here if join order is not valid */
226- if (!joinrel )
227- break ;
228-
229- /* Find and save the cheapest paths for this rel */
230- set_cheapest (joinrel );
231-
232- /* Pop the stack and replace the inputs with their join */
233- stack_depth -- ;
234- stack [stack_depth - 1 ] = joinrel ;
245+ joinrel = make_join_rel (root ,
246+ old_clump -> joinrel ,
247+ new_clump -> joinrel );
248+
249+ /* Keep searching if join order is not valid */
250+ if (joinrel )
251+ {
252+ /* Find and save the cheapest paths for this joinrel */
253+ set_cheapest (joinrel );
254+
255+ /* Absorb new clump into old */
256+ old_clump -> joinrel = joinrel ;
257+ old_clump -> size += new_clump -> size ;
258+ pfree (new_clump );
259+
260+ /* Remove old_clump from list */
261+ clumps = list_delete_cell (clumps , lc , prev );
262+
263+ /*
264+ * Recursively try to merge the enlarged old_clump with
265+ * others. When no further merge is possible, we'll reinsert
266+ * it into the list.
267+ */
268+ return merge_clump (root , clumps , old_clump , force );
269+ }
235270 }
271+ prev = lc ;
236272 }
237273
238- /* Did we succeed in forming a single join relation? */
239- if (stack_depth == 1 )
240- joinrel = stack [0 ];
241- else
242- joinrel = NULL ;
274+ /*
275+ * No merging is possible, so add new_clump as an independent clump, in
276+ * proper order according to size. We can be fast for the common case
277+ * where it has size 1 --- it should always go at the end.
278+ */
279+ if (clumps == NIL || new_clump -> size == 1 )
280+ return lappend (clumps , new_clump );
281+
282+ /* Check if it belongs at the front */
283+ lc = list_head (clumps );
284+ if (new_clump -> size > ((Clump * ) lfirst (lc ))-> size )
285+ return lcons (new_clump , clumps );
243286
244- pfree (stack );
287+ /* Else search for the place to insert it */
288+ for (;;)
289+ {
290+ ListCell * nxt = lnext (lc );
291+
292+ if (nxt == NULL || new_clump -> size > ((Clump * ) lfirst (nxt ))-> size )
293+ break ; /* it belongs after 'lc', before 'nxt' */
294+ lc = nxt ;
295+ }
296+ lappend_cell (clumps , lc , new_clump );
245297
246- return joinrel ;
298+ return clumps ;
247299}
248300
249301/*
0 commit comments