PostgreSQL Source Code git master
prepunion.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * prepunion.c
4 * Routines to plan set-operation queries. The filename is a leftover
5 * from a time when only UNIONs were implemented.
6 *
7 * There are two code paths in the planner for set-operation queries.
8 * If a subquery consists entirely of simple UNION ALL operations, it
9 * is converted into an "append relation". Otherwise, it is handled
10 * by the general code in this module (plan_set_operations and its
11 * subroutines). There is some support code here for the append-relation
12 * case, but most of the heavy lifting for that is done elsewhere,
13 * notably in prepjointree.c and allpaths.c.
14 *
15 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 * Portions Copyright (c) 1994, Regents of the University of California
17 *
18 *
19 * IDENTIFICATION
20 * src/backend/optimizer/prep/prepunion.c
21 *
22 *-------------------------------------------------------------------------
23 */
24#include "postgres.h"
25
26#include <math.h>
27
28#include "access/htup_details.h"
29#include "catalog/pg_type.h"
30#include "miscadmin.h"
31#include "nodes/makefuncs.h"
32#include "nodes/nodeFuncs.h"
33#include "optimizer/cost.h"
34#include "optimizer/pathnode.h"
35#include "optimizer/paths.h"
36#include "optimizer/planner.h"
37#include "optimizer/prep.h"
38#include "optimizer/tlist.h"
39#include "parser/parse_coerce.h"
40#include "utils/selfuncs.h"
41
42
44 SetOperationStmt *parentOp,
45 List *colTypes, List *colCollations,
46 List *refnames_tlist,
47 List **pTargetList,
48 bool *istrivial_tlist);
51 List *refnames_tlist,
52 List **pTargetList);
54 bool trivial_tlist, List *child_tlist,
55 List *interesting_pathkeys,
56 double *pNumGroups);
58 List *refnames_tlist,
59 List **pTargetList);
61 List *refnames_tlist,
62 List **pTargetList);
64 SetOperationStmt *top_union,
65 List *refnames_tlist,
66 List **tlist_list,
67 List **istrivial_tlist);
69static List *generate_setop_tlist(List *colTypes, List *colCollations,
70 Index varno,
71 bool hack_constants,
72 List *input_tlist,
73 List *refnames_tlist,
74 bool *trivial_tlist);
75static List *generate_append_tlist(List *colTypes, List *colCollations,
76 List *input_tlists,
77 List *refnames_tlist);
78static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
80 List *child_pathlist);
81
82
83/*
84 * plan_set_operations
85 *
86 * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
87 *
88 * This routine only deals with the setOperations tree of the given query.
89 * Any top-level ORDER BY requested in root->parse->sortClause will be handled
90 * when we return to grouping_planner; likewise for LIMIT.
91 *
92 * What we return is an "upperrel" RelOptInfo containing at least one Path
93 * that implements the set-operation tree. In addition, root->processed_tlist
94 * receives a targetlist representing the output of the topmost setop node.
95 */
98{
99 Query *parse = root->parse;
100 SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
101 Node *node;
102 RangeTblEntry *leftmostRTE;
103 Query *leftmostQuery;
104 RelOptInfo *setop_rel;
105 List *top_tlist;
106
107 Assert(topop);
108
109 /* check for unsupported stuff */
110 Assert(parse->jointree->fromlist == NIL);
111 Assert(parse->jointree->quals == NULL);
112 Assert(parse->groupClause == NIL);
113 Assert(parse->havingQual == NULL);
114 Assert(parse->windowClause == NIL);
115 Assert(parse->distinctClause == NIL);
116
117 /*
118 * In the outer query level, equivalence classes are limited to classes
119 * which define that the top-level target entry is equivalent to the
120 * corresponding child target entry. There won't be any equivalence class
121 * merging. Mark that merging is complete to allow us to make pathkeys.
122 */
123 Assert(root->eq_classes == NIL);
124 root->ec_merging_done = true;
125
126 /*
127 * We'll need to build RelOptInfos for each of the leaf subqueries, which
128 * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
129 * arrays for those, and for AppendRelInfos in case they're needed.
130 */
132
133 /*
134 * Find the leftmost component Query. We need to use its column names for
135 * all generated tlists (else SELECT INTO won't work right).
136 */
137 node = topop->larg;
138 while (node && IsA(node, SetOperationStmt))
139 node = ((SetOperationStmt *) node)->larg;
140 Assert(node && IsA(node, RangeTblRef));
141 leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
142 leftmostQuery = leftmostRTE->subquery;
143 Assert(leftmostQuery != NULL);
144
145 /*
146 * If the topmost node is a recursive union, it needs special processing.
147 */
148 if (root->hasRecursion)
149 {
150 setop_rel = generate_recursion_path(topop, root,
151 leftmostQuery->targetList,
152 &top_tlist);
153 }
154 else
155 {
156 bool trivial_tlist;
157
158 /*
159 * Recurse on setOperations tree to generate paths for set ops. The
160 * final output paths should have just the column types shown as the
161 * output from the top-level node.
162 */
163 setop_rel = recurse_set_operations((Node *) topop, root,
164 NULL, /* no parent */
165 topop->colTypes, topop->colCollations,
166 leftmostQuery->targetList,
167 &top_tlist,
168 &trivial_tlist);
169 }
170
171 /* Must return the built tlist into root->processed_tlist. */
172 root->processed_tlist = top_tlist;
173
174 return setop_rel;
175}
176
177/*
178 * recurse_set_operations
179 * Recursively handle one step in a tree of set operations
180 *
181 * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
182 * parentOp: parent step, or NULL if none (but see below)
183 * colTypes: OID list of set-op's result column datatypes
184 * colCollations: OID list of set-op's result column collations
185 * refnames_tlist: targetlist to take column names from
186 *
187 * parentOp should be passed as NULL unless that step is interested in
188 * getting sorted output from this step. ("Sorted" means "sorted according
189 * to the default btree opclasses of the result column datatypes".)
190 *
191 * Returns a RelOptInfo for the subtree, as well as these output parameters:
192 * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
193 * *istrivial_tlist: true if, and only if, datatypes between parent and child
194 * match.
195 *
196 * If setOp is a leaf node, this function plans the sub-query but does
197 * not populate the pathlist of the returned RelOptInfo. The caller will
198 * generate SubqueryScan paths using useful path(s) of the subquery (see
199 * build_setop_child_paths). But this function does build the paths for
200 * set-operation nodes.
201 *
202 * The pTargetList output parameter is mostly redundant with the pathtarget
203 * of the returned RelOptInfo, but for the moment we need it because much of
204 * the logic in this file depends on flag columns being marked resjunk.
205 * XXX Now that there are no flag columns and hence no resjunk columns, we
206 * could probably refactor this file to deal only in pathtargets.
207 *
208 * We don't have to care about typmods here: the only allowed difference
209 * between set-op input and output typmods is input is a specific typmod
210 * and output is -1, and that does not require a coercion.
211 */
212static RelOptInfo *
214 SetOperationStmt *parentOp,
215 List *colTypes, List *colCollations,
216 List *refnames_tlist,
217 List **pTargetList,
218 bool *istrivial_tlist)
219{
220 RelOptInfo *rel;
221
222 *istrivial_tlist = true; /* for now */
223
224 /* Guard against stack overflow due to overly complex setop nests */
226
227 if (IsA(setOp, RangeTblRef))
228 {
229 RangeTblRef *rtr = (RangeTblRef *) setOp;
230 RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
231 Query *subquery = rte->subquery;
232 PlannerInfo *subroot;
233 List *tlist;
234 bool trivial_tlist;
235 char *plan_name;
236
237 Assert(subquery != NULL);
238
239 /* Build a RelOptInfo for this leaf subquery. */
240 rel = build_simple_rel(root, rtr->rtindex, NULL);
241
242 /* plan_params should not be in use in current query level */
243 Assert(root->plan_params == NIL);
244
245 /*
246 * Generate a subroot and Paths for the subquery. If we have a
247 * parentOp, pass that down to encourage subquery_planner to consider
248 * suitably-sorted Paths.
249 */
250 plan_name = choose_plan_name(root->glob, "setop", true);
251 subroot = rel->subroot = subquery_planner(root->glob, subquery,
252 plan_name, root,
253 false, root->tuple_fraction,
254 parentOp);
255
256 /*
257 * It should not be possible for the primitive query to contain any
258 * cross-references to other primitive queries in the setop tree.
259 */
260 if (root->plan_params)
261 elog(ERROR, "unexpected outer reference in set operation subquery");
262
263 /* Figure out the appropriate target list for this subquery. */
264 tlist = generate_setop_tlist(colTypes, colCollations,
265 rtr->rtindex,
266 true,
267 subroot->processed_tlist,
268 refnames_tlist,
269 &trivial_tlist);
270 rel->reltarget = create_pathtarget(root, tlist);
271
272 /* Return the fully-fledged tlist to caller, too */
273 *pTargetList = tlist;
274 *istrivial_tlist = trivial_tlist;
275 }
276 else if (IsA(setOp, SetOperationStmt))
277 {
278 SetOperationStmt *op = (SetOperationStmt *) setOp;
279
280 /* UNIONs are much different from INTERSECT/EXCEPT */
281 if (op->op == SETOP_UNION)
282 rel = generate_union_paths(op, root,
283 refnames_tlist,
284 pTargetList);
285 else
287 refnames_tlist,
288 pTargetList);
289
290 /*
291 * If necessary, add a Result node to project the caller-requested
292 * output columns.
293 *
294 * XXX you don't really want to know about this: setrefs.c will apply
295 * fix_upper_expr() to the Result node's tlist. This would fail if the
296 * Vars generated by generate_setop_tlist() were not exactly equal()
297 * to the corresponding tlist entries of the subplan. However, since
298 * the subplan was generated by generate_union_paths() or
299 * generate_nonunion_paths(), and hence its tlist was generated by
300 * generate_append_tlist() or generate_setop_tlist(), this will work.
301 * We just tell generate_setop_tlist() to use varno 0.
302 */
303 if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
304 !tlist_same_collations(*pTargetList, colCollations, false))
305 {
306 PathTarget *target;
307 bool trivial_tlist;
308 ListCell *lc;
309
310 *pTargetList = generate_setop_tlist(colTypes, colCollations,
311 0,
312 false,
313 *pTargetList,
314 refnames_tlist,
315 &trivial_tlist);
316 *istrivial_tlist = trivial_tlist;
317 target = create_pathtarget(root, *pTargetList);
318
319 /* Apply projection to each path */
320 foreach(lc, rel->pathlist)
321 {
322 Path *subpath = (Path *) lfirst(lc);
323 Path *path;
324
325 Assert(subpath->param_info == NULL);
326 path = apply_projection_to_path(root, subpath->parent,
327 subpath, target);
328 /* If we had to add a Result, path is different from subpath */
329 if (path != subpath)
330 lfirst(lc) = path;
331 }
332
333 /* Apply projection to each partial path */
334 foreach(lc, rel->partial_pathlist)
335 {
336 Path *subpath = (Path *) lfirst(lc);
337 Path *path;
338
339 Assert(subpath->param_info == NULL);
340
341 /* avoid apply_projection_to_path, in case of multiple refs */
342 path = (Path *) create_projection_path(root, subpath->parent,
343 subpath, target);
344 lfirst(lc) = path;
345 }
346 }
348 }
349 else
350 {
351 elog(ERROR, "unrecognized node type: %d",
352 (int) nodeTag(setOp));
353 *pTargetList = NIL;
354 rel = NULL; /* keep compiler quiet */
355 }
356
357 return rel;
358}
359
360/*
361 * Generate paths for a recursive UNION node
362 */
363static RelOptInfo *
365 List *refnames_tlist,
366 List **pTargetList)
367{
368 RelOptInfo *result_rel;
369 Path *path;
370 RelOptInfo *lrel,
371 *rrel;
372 Path *lpath;
373 Path *rpath;
374 List *lpath_tlist;
375 bool lpath_trivial_tlist;
376 List *rpath_tlist;
377 bool rpath_trivial_tlist;
378 List *tlist;
379 List *groupList;
380 double dNumGroups;
381
382 /* Parser should have rejected other cases */
383 if (setOp->op != SETOP_UNION)
384 elog(ERROR, "only UNION queries can be recursive");
385 /* Worktable ID should be assigned */
386 Assert(root->wt_param_id >= 0);
387
388 /*
389 * Unlike a regular UNION node, process the left and right inputs
390 * separately without any intention of combining them into one Append.
391 */
392 lrel = recurse_set_operations(setOp->larg, root,
393 NULL, /* no value in sorted results */
394 setOp->colTypes, setOp->colCollations,
395 refnames_tlist,
396 &lpath_tlist,
397 &lpath_trivial_tlist);
398 if (lrel->rtekind == RTE_SUBQUERY)
399 build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
400 NIL, NULL);
401 lpath = lrel->cheapest_total_path;
402 /* The right path will want to look at the left one ... */
403 root->non_recursive_path = lpath;
404 rrel = recurse_set_operations(setOp->rarg, root,
405 NULL, /* no value in sorted results */
406 setOp->colTypes, setOp->colCollations,
407 refnames_tlist,
408 &rpath_tlist,
409 &rpath_trivial_tlist);
410 if (rrel->rtekind == RTE_SUBQUERY)
411 build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
412 NIL, NULL);
413 rpath = rrel->cheapest_total_path;
414 root->non_recursive_path = NULL;
415
416 /*
417 * Generate tlist for RecursiveUnion path node --- same as in Append cases
418 */
419 tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
420 list_make2(lpath_tlist, rpath_tlist),
421 refnames_tlist);
422
423 *pTargetList = tlist;
424
425 /* Build result relation. */
426 result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
427 bms_union(lrel->relids, rrel->relids));
428 result_rel->reltarget = create_pathtarget(root, tlist);
429
430 /*
431 * If UNION, identify the grouping operators
432 */
433 if (setOp->all)
434 {
435 groupList = NIL;
436 dNumGroups = 0;
437 }
438 else
439 {
440 /* Identify the grouping semantics */
441 groupList = generate_setop_grouplist(setOp, tlist);
442
443 /* We only support hashing here */
444 if (!grouping_is_hashable(groupList))
446 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
447 errmsg("could not implement recursive UNION"),
448 errdetail("All column datatypes must be hashable.")));
449
450 /*
451 * For the moment, take the number of distinct groups as equal to the
452 * total input size, ie, the worst case.
453 */
454 dNumGroups = lpath->rows + rpath->rows * 10;
455 }
456
457 /*
458 * And make the path node.
459 */
461 result_rel,
462 lpath,
463 rpath,
464 result_rel->reltarget,
465 groupList,
466 root->wt_param_id,
467 dNumGroups);
468
469 add_path(result_rel, path);
470 postprocess_setop_rel(root, result_rel);
471 return result_rel;
472}
473
474/*
475 * build_setop_child_paths
476 * Build paths for the set op child relation denoted by 'rel'.
477 *
478 * 'rel' is an RTE_SUBQUERY relation. We have already generated paths within
479 * the subquery's subroot; the task here is to create SubqueryScan paths for
480 * 'rel', representing scans of the useful subquery paths.
481 *
482 * interesting_pathkeys: if not NIL, also include paths that suit these
483 * pathkeys, sorting any unsorted paths as required.
484 * *pNumGroups: if not NULL, we estimate the number of distinct groups
485 * in the result, and store it there.
486 */
487static void
489 bool trivial_tlist, List *child_tlist,
490 List *interesting_pathkeys, double *pNumGroups)
491{
492 RelOptInfo *final_rel;
493 List *setop_pathkeys = rel->subroot->setop_pathkeys;
494 ListCell *lc;
495
496 /* it can't be a set op child rel if it's not a subquery */
497 Assert(rel->rtekind == RTE_SUBQUERY);
498
499 /* when sorting is needed, add child rel equivalences */
500 if (interesting_pathkeys != NIL)
502 rel,
503 child_tlist,
504 interesting_pathkeys);
505
506 /*
507 * Mark rel with estimated output rows, width, etc. Note that we have to
508 * do this before generating outer-query paths, else cost_subqueryscan is
509 * not happy.
510 */
512
513 /*
514 * Since we may want to add a partial path to this relation, we must set
515 * its consider_parallel flag correctly.
516 */
517 final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
518 rel->consider_parallel = final_rel->consider_parallel;
519
520 /* Generate subquery scan paths for any interesting path in final_rel */
521 foreach(lc, final_rel->pathlist)
522 {
523 Path *subpath = (Path *) lfirst(lc);
524 List *pathkeys;
525 Path *cheapest_input_path = final_rel->cheapest_total_path;
526 bool is_sorted;
527 int presorted_keys;
528
529 /* If the input rel is dummy, propagate that to this query level */
530 if (is_dummy_rel(final_rel))
531 {
532 mark_dummy_rel(rel);
533 continue;
534 }
535
536 /*
537 * Include the cheapest path as-is so that the set operation can be
538 * cheaply implemented using a method which does not require the input
539 * to be sorted.
540 */
541 if (subpath == cheapest_input_path)
542 {
543 /* Convert subpath's pathkeys to outer representation */
544 pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
546
547 /* Generate outer path using this subpath */
549 rel,
550 subpath,
551 trivial_tlist,
552 pathkeys,
553 NULL));
554 }
555
556 /* skip dealing with sorted paths if the setop doesn't need them */
557 if (interesting_pathkeys == NIL)
558 continue;
559
560 /*
561 * Create paths to suit final sort order required for setop_pathkeys.
562 * Here we'll sort the cheapest input path (if not sorted already) and
563 * incremental sort any paths which are partially sorted.
564 */
565 is_sorted = pathkeys_count_contained_in(setop_pathkeys,
566 subpath->pathkeys,
567 &presorted_keys);
568
569 if (!is_sorted)
570 {
571 double limittuples = rel->subroot->limit_tuples;
572
573 /*
574 * Try at least sorting the cheapest path and also try
575 * incrementally sorting any path which is partially sorted
576 * already (no need to deal with paths which have presorted keys
577 * when incremental sort is disabled unless it's the cheapest
578 * input path).
579 */
580 if (subpath != cheapest_input_path &&
581 (presorted_keys == 0 || !enable_incremental_sort))
582 continue;
583
584 /*
585 * We've no need to consider both a sort and incremental sort.
586 * We'll just do a sort if there are no presorted keys and an
587 * incremental sort when there are presorted keys.
588 */
589 if (presorted_keys == 0 || !enable_incremental_sort)
591 final_rel,
592 subpath,
593 setop_pathkeys,
594 limittuples);
595 else
597 final_rel,
598 subpath,
599 setop_pathkeys,
600 presorted_keys,
601 limittuples);
602 }
603
604 /*
605 * subpath is now sorted, so add it to the pathlist. We already added
606 * the cheapest_input_path above, so don't add it again unless we just
607 * sorted it.
608 */
609 if (subpath != cheapest_input_path)
610 {
611 /* Convert subpath's pathkeys to outer representation */
612 pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
614
615 /* Generate outer path using this subpath */
617 rel,
618 subpath,
619 trivial_tlist,
620 pathkeys,
621 NULL));
622 }
623 }
624
625 /* if consider_parallel is false, there should be no partial paths */
626 Assert(final_rel->consider_parallel ||
627 final_rel->partial_pathlist == NIL);
628
629 /*
630 * If we have a partial path for the child relation, we can use that to
631 * build a partial path for this relation. But there's no point in
632 * considering any path but the cheapest.
633 */
635 final_rel->partial_pathlist != NIL)
636 {
637 Path *partial_subpath;
638 Path *partial_path;
639
640 partial_subpath = linitial(final_rel->partial_pathlist);
641 partial_path = (Path *)
642 create_subqueryscan_path(root, rel, partial_subpath,
643 trivial_tlist,
644 NIL, NULL);
645 add_partial_path(rel, partial_path);
646 }
647
649
650 /*
651 * Estimate number of groups if caller wants it. If the subquery used
652 * grouping or aggregation, its output is probably mostly unique anyway;
653 * otherwise do statistical estimation.
654 *
655 * XXX you don't really want to know about this: we do the estimation
656 * using the subroot->parse's original targetlist expressions, not the
657 * subroot->processed_tlist which might seem more appropriate. The reason
658 * is that if the subquery is itself a setop, it may return a
659 * processed_tlist containing "varno 0" Vars generated by
660 * generate_append_tlist, and those would confuse estimate_num_groups
661 * mightily. We ought to get rid of the "varno 0" hack, but that requires
662 * a redesign of the parsetree representation of setops, so that there can
663 * be an RTE corresponding to each setop's output. Note, we use this not
664 * subquery's targetlist but subroot->parse's targetlist, because it was
665 * revised by self-join removal. subquery's targetlist might contain the
666 * references to the removed relids.
667 */
668 if (pNumGroups)
669 {
670 PlannerInfo *subroot = rel->subroot;
671 Query *subquery = subroot->parse;
672
673 if (subquery->groupClause || subquery->groupingSets ||
674 subquery->distinctClause || subroot->hasHavingQual ||
675 subquery->hasAggs)
676 *pNumGroups = rel->cheapest_total_path->rows;
677 else
678 *pNumGroups = estimate_num_groups(subroot,
679 get_tlist_exprs(subroot->parse->targetList, false),
681 NULL,
682 NULL);
683 }
684}
685
686/*
687 * Generate paths for a UNION or UNION ALL node
688 */
689static RelOptInfo *
691 List *refnames_tlist,
692 List **pTargetList)
693{
694 Relids relids = NULL;
695 RelOptInfo *result_rel;
696 ListCell *lc;
697 ListCell *lc2;
698 ListCell *lc3;
699 List *cheapest_pathlist = NIL;
700 List *ordered_pathlist = NIL;
701 List *partial_pathlist = NIL;
702 bool partial_paths_valid = true;
703 bool consider_parallel = true;
704 List *rellist;
705 List *tlist_list;
706 List *trivial_tlist_list;
707 List *tlist;
708 List *groupList = NIL;
709 Path *apath;
710 Path *gpath = NULL;
711 bool try_sorted = false;
712 List *union_pathkeys = NIL;
713
714 /*
715 * If any of my children are identical UNION nodes (same op, all-flag, and
716 * colTypes/colCollations) then they can be merged into this node so that
717 * we generate only one Append/MergeAppend and unique-ification for the
718 * lot. Recurse to find such nodes.
719 */
720 rellist = plan_union_children(root,
721 op,
722 refnames_tlist,
723 &tlist_list,
724 &trivial_tlist_list);
725
726 /*
727 * Generate tlist for Append/MergeAppend plan node.
728 *
729 * The tlist for an Append plan isn't important as far as the Append is
730 * concerned, but we must make it look real anyway for the benefit of the
731 * next plan level up.
732 */
733 tlist = generate_append_tlist(op->colTypes, op->colCollations,
734 tlist_list, refnames_tlist);
735 *pTargetList = tlist;
736
737 /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
738 if (!op->all)
739 {
740 /* Identify the grouping semantics */
741 groupList = generate_setop_grouplist(op, tlist);
742
743 if (grouping_is_sortable(op->groupClauses))
744 {
745 try_sorted = true;
746 /* Determine the pathkeys for sorting by the whole target list */
747 union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
748 tlist);
749
750 root->query_pathkeys = union_pathkeys;
751 }
752 }
753
754 /*
755 * Now that we've got the append target list, we can build the union child
756 * paths.
757 */
758 forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
759 {
760 RelOptInfo *rel = lfirst(lc);
761 bool trivial_tlist = lfirst_int(lc2);
762 List *child_tlist = lfirst_node(List, lc3);
763
764 /* only build paths for the union children */
765 if (rel->rtekind == RTE_SUBQUERY)
766 build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
767 union_pathkeys, NULL);
768 }
769
770 /* Build path lists and relid set. */
771 foreach(lc, rellist)
772 {
773 RelOptInfo *rel = lfirst(lc);
774 Path *ordered_path;
775
776 /*
777 * Record the relids so that we can identify the correct
778 * UPPERREL_SETOP RelOptInfo below.
779 */
780 relids = bms_add_members(relids, rel->relids);
781
782 /* Skip any UNION children that are proven not to yield any rows */
783 if (is_dummy_rel(rel))
784 continue;
785
786 cheapest_pathlist = lappend(cheapest_pathlist,
788
789 if (try_sorted)
790 {
791 ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
792 union_pathkeys,
793 NULL,
795 false);
796
797 if (ordered_path != NULL)
798 ordered_pathlist = lappend(ordered_pathlist, ordered_path);
799 else
800 {
801 /*
802 * If we can't find a sorted path, just give up trying to
803 * generate a list of correctly sorted child paths. This can
804 * happen when type coercion was added to the targetlist due
805 * to mismatching types from the union children.
806 */
807 try_sorted = false;
808 }
809 }
810
811 if (consider_parallel)
812 {
813 if (!rel->consider_parallel)
814 {
815 consider_parallel = false;
816 partial_paths_valid = false;
817 }
818 else if (rel->partial_pathlist == NIL)
819 partial_paths_valid = false;
820 else
821 partial_pathlist = lappend(partial_pathlist,
823 }
824 }
825
826 /* Build result relation. */
827 result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
828 result_rel->reltarget = create_setop_pathtarget(root, tlist,
829 cheapest_pathlist);
830 result_rel->consider_parallel = consider_parallel;
831 result_rel->consider_startup = (root->tuple_fraction > 0);
832
833 /* If all UNION children were dummy rels, make the resulting rel dummy */
834 if (cheapest_pathlist == NIL)
835 {
836 mark_dummy_rel(result_rel);
837
838 return result_rel;
839 }
840
841 /*
842 * Append the child results together using the cheapest paths from each
843 * union child.
844 */
845 apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
846 NIL, NIL, NULL, 0, false, -1);
847
848 /*
849 * Estimate number of groups. For now we just assume the output is unique
850 * --- this is certainly true for the UNION case, and we want worst-case
851 * estimates anyway.
852 */
853 result_rel->rows = apath->rows;
854
855 /*
856 * Now consider doing the same thing using the partial paths plus Append
857 * plus Gather.
858 */
859 if (partial_paths_valid)
860 {
861 Path *papath;
862 int parallel_workers = 0;
863
864 /* Find the highest number of workers requested for any subpath. */
865 foreach(lc, partial_pathlist)
866 {
867 Path *subpath = lfirst(lc);
868
869 parallel_workers = Max(parallel_workers,
870 subpath->parallel_workers);
871 }
872 Assert(parallel_workers > 0);
873
874 /*
875 * If the use of parallel append is permitted, always request at least
876 * log2(# of children) paths. We assume it can be useful to have
877 * extra workers in this case because they will be spread out across
878 * the children. The precise formula is just a guess; see
879 * add_paths_to_append_rel.
880 */
882 {
883 parallel_workers = Max(parallel_workers,
884 pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
885 parallel_workers = Min(parallel_workers,
887 }
888 Assert(parallel_workers > 0);
889
890 papath = (Path *)
891 create_append_path(root, result_rel, NIL, partial_pathlist,
892 NIL, NULL, parallel_workers,
894 gpath = (Path *)
895 create_gather_path(root, result_rel, papath,
896 result_rel->reltarget, NULL, NULL);
897 }
898
899 if (!op->all)
900 {
901 double dNumGroups;
902 bool can_sort = grouping_is_sortable(groupList);
903 bool can_hash = grouping_is_hashable(groupList);
904 Path *first_path = linitial(cheapest_pathlist);
905
906 /*
907 * Estimate the number of UNION output rows. In the case when only a
908 * single UNION child remains, we can use estimate_num_groups() on
909 * that child. We must be careful not to do this when that child is
910 * the result of some other set operation as the targetlist will
911 * contain Vars with varno==0, which estimate_num_groups() wouldn't
912 * like.
913 */
914 if (list_length(cheapest_pathlist) == 1 &&
915 first_path->parent->reloptkind != RELOPT_UPPER_REL)
916 {
917 dNumGroups = estimate_num_groups(root,
918 first_path->pathtarget->exprs,
919 first_path->rows,
920 NULL,
921 NULL);
922 }
923 else
924 {
925 /*
926 * Otherwise, for the moment, take the number of distinct groups
927 * as equal to the total input size, i.e., the worst case. This
928 * is too conservative, but it's not clear how to get a decent
929 * estimate of the true size. One should note as well the
930 * propensity of novices to write UNION rather than UNION ALL even
931 * when they don't expect any duplicates...
932 */
933 dNumGroups = apath->rows;
934 }
935
936 if (can_hash)
937 {
938 Path *path;
939
940 /*
941 * Try a hash aggregate plan on 'apath'. This is the cheapest
942 * available path containing each append child.
943 */
944 path = (Path *) create_agg_path(root,
945 result_rel,
946 apath,
947 result_rel->reltarget,
950 groupList,
951 NIL,
952 NULL,
953 dNumGroups);
954 add_path(result_rel, path);
955
956 /* Try hash aggregate on the Gather path, if valid */
957 if (gpath != NULL)
958 {
959 /* Hashed aggregate plan --- no sort needed */
960 path = (Path *) create_agg_path(root,
961 result_rel,
962 gpath,
963 result_rel->reltarget,
966 groupList,
967 NIL,
968 NULL,
969 dNumGroups);
970 add_path(result_rel, path);
971 }
972 }
973
974 if (can_sort)
975 {
976 Path *path = apath;
977
978 /* Try Sort -> Unique on the Append path */
979 if (groupList != NIL)
980 path = (Path *) create_sort_path(root, result_rel, path,
981 make_pathkeys_for_sortclauses(root, groupList, tlist),
982 -1.0);
983
984 path = (Path *) create_unique_path(root,
985 result_rel,
986 path,
987 list_length(path->pathkeys),
988 dNumGroups);
989
990 add_path(result_rel, path);
991
992 /* Try Sort -> Unique on the Gather path, if set */
993 if (gpath != NULL)
994 {
995 path = gpath;
996
997 path = (Path *) create_sort_path(root, result_rel, path,
998 make_pathkeys_for_sortclauses(root, groupList, tlist),
999 -1.0);
1000
1001 path = (Path *) create_unique_path(root,
1002 result_rel,
1003 path,
1004 list_length(path->pathkeys),
1005 dNumGroups);
1006 add_path(result_rel, path);
1007 }
1008 }
1009
1010 /*
1011 * Try making a MergeAppend path if we managed to find a path with the
1012 * correct pathkeys in each union child query.
1013 */
1014 if (try_sorted && groupList != NIL)
1015 {
1016 Path *path;
1017
1019 result_rel,
1020 ordered_pathlist,
1021 union_pathkeys,
1022 NULL);
1023
1024 /* and make the MergeAppend unique */
1025 path = (Path *) create_unique_path(root,
1026 result_rel,
1027 path,
1028 list_length(tlist),
1029 dNumGroups);
1030
1031 add_path(result_rel, path);
1032 }
1033 }
1034 else
1035 {
1036 /* UNION ALL */
1037 add_path(result_rel, apath);
1038
1039 if (gpath != NULL)
1040 add_path(result_rel, gpath);
1041 }
1042
1043 return result_rel;
1044}
1045
1046/*
1047 * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1048 */
1049static RelOptInfo *
1051 List *refnames_tlist,
1052 List **pTargetList)
1053{
1054 RelOptInfo *result_rel;
1055 RelOptInfo *lrel,
1056 *rrel;
1057 double save_fraction = root->tuple_fraction;
1058 Path *lpath,
1059 *rpath,
1060 *path;
1061 List *lpath_tlist,
1062 *rpath_tlist,
1063 *tlist,
1064 *groupList;
1065 bool lpath_trivial_tlist,
1066 rpath_trivial_tlist,
1067 result_trivial_tlist;
1068 List *nonunion_pathkeys = NIL;
1069 double dLeftGroups,
1070 dRightGroups,
1071 dNumGroups,
1072 dNumOutputRows;
1073 bool can_sort;
1074 bool can_hash;
1075 SetOpCmd cmd;
1076
1077 /*
1078 * Tell children to fetch all tuples.
1079 */
1080 root->tuple_fraction = 0.0;
1081
1082 /* Recurse on children */
1083 lrel = recurse_set_operations(op->larg, root,
1084 op,
1085 op->colTypes, op->colCollations,
1086 refnames_tlist,
1087 &lpath_tlist,
1088 &lpath_trivial_tlist);
1089
1090 rrel = recurse_set_operations(op->rarg, root,
1091 op,
1092 op->colTypes, op->colCollations,
1093 refnames_tlist,
1094 &rpath_tlist,
1095 &rpath_trivial_tlist);
1096
1097 /*
1098 * Generate tlist for SetOp plan node.
1099 *
1100 * The tlist for a SetOp plan isn't important so far as the SetOp is
1101 * concerned, but we must make it look real anyway for the benefit of the
1102 * next plan level up.
1103 */
1104 tlist = generate_setop_tlist(op->colTypes, op->colCollations,
1105 0, false, lpath_tlist, refnames_tlist,
1106 &result_trivial_tlist);
1107
1108 /* We should not have needed any type coercions in the tlist */
1109 Assert(result_trivial_tlist);
1110
1111 *pTargetList = tlist;
1112
1113 /* Identify the grouping semantics */
1114 groupList = generate_setop_grouplist(op, tlist);
1115
1116 /* Check whether the operators support sorting or hashing */
1117 can_sort = grouping_is_sortable(groupList);
1118 can_hash = grouping_is_hashable(groupList);
1119 if (!can_sort && !can_hash)
1120 ereport(ERROR,
1121 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1122 /* translator: %s is INTERSECT or EXCEPT */
1123 errmsg("could not implement %s",
1124 (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
1125 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1126
1127 if (can_sort)
1128 {
1129 /* Determine the pathkeys for sorting by the whole target list */
1130 nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
1131 tlist);
1132
1133 root->query_pathkeys = nonunion_pathkeys;
1134 }
1135
1136 /*
1137 * Now that we've got all that info, we can build the child paths.
1138 */
1139 if (lrel->rtekind == RTE_SUBQUERY)
1140 build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1141 nonunion_pathkeys, &dLeftGroups);
1142 else
1143 dLeftGroups = lrel->rows;
1144 if (rrel->rtekind == RTE_SUBQUERY)
1145 build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1146 nonunion_pathkeys, &dRightGroups);
1147 else
1148 dRightGroups = rrel->rows;
1149
1150 /* Undo effects of forcing tuple_fraction to 0 */
1151 root->tuple_fraction = save_fraction;
1152
1153 /*
1154 * For EXCEPT, we must put the left input first. For INTERSECT, either
1155 * order should give the same results, and we prefer to put the smaller
1156 * input first in order to (a) minimize the size of the hash table in the
1157 * hashing case, and (b) improve our chances of exploiting the executor's
1158 * fast path for empty left-hand input. "Smaller" means the one with the
1159 * fewer groups.
1160 */
1161 if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
1162 {
1163 /* need to swap the two inputs */
1164 RelOptInfo *tmprel;
1165 List *tmplist;
1166 double tmpd;
1167
1168 tmprel = lrel;
1169 lrel = rrel;
1170 rrel = tmprel;
1171 tmplist = lpath_tlist;
1172 lpath_tlist = rpath_tlist;
1173 rpath_tlist = tmplist;
1174 tmpd = dLeftGroups;
1175 dLeftGroups = dRightGroups;
1176 dRightGroups = tmpd;
1177 }
1178
1179 lpath = lrel->cheapest_total_path;
1180 rpath = rrel->cheapest_total_path;
1181
1182 /* Build result relation. */
1183 result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1184 bms_union(lrel->relids, rrel->relids));
1185
1186 /*
1187 * Create the PathTarget and set the width accordingly. For EXCEPT, since
1188 * the set op result won't contain rows from the rpath, we only account
1189 * for the width of the lpath. For INTERSECT, use both input paths.
1190 */
1191 if (op->op == SETOP_EXCEPT)
1192 result_rel->reltarget = create_setop_pathtarget(root, tlist,
1193 list_make1(lpath));
1194 else
1195 result_rel->reltarget = create_setop_pathtarget(root, tlist,
1196 list_make2(lpath, rpath));
1197
1198 /* Check for provably empty setop inputs and add short-circuit paths. */
1199 if (op->op == SETOP_EXCEPT)
1200 {
1201 /*
1202 * For EXCEPTs, if the left side is dummy then there's no need to
1203 * inspect the right-hand side as scanning the right to find tuples to
1204 * remove won't make the left-hand input any more empty.
1205 */
1206 if (is_dummy_rel(lrel))
1207 {
1208 mark_dummy_rel(result_rel);
1209
1210 return result_rel;
1211 }
1212
1213 /* Handle EXCEPTs with dummy right input */
1214 if (is_dummy_rel(rrel))
1215 {
1216 if (op->all)
1217 {
1218 Path *apath;
1219
1220 /*
1221 * EXCEPT ALL: If the right-hand input is dummy then we can
1222 * simply scan the left-hand input. To keep createplan.c
1223 * happy, use a single child Append to handle the translation
1224 * between the set op targetlist and the targetlist of the
1225 * left input. The Append will be removed in setrefs.c.
1226 */
1227 apath = (Path *) create_append_path(root, result_rel, list_make1(lpath),
1228 NIL, NIL, NULL, 0, false, -1);
1229
1230 add_path(result_rel, apath);
1231
1232 return result_rel;
1233 }
1234 else
1235 {
1236 /*
1237 * To make EXCEPT with a dummy RHS work means having to
1238 * deduplicate the left input. That could be done with
1239 * AggPaths, but it doesn't seem worth the effort. Let the
1240 * normal path generation code below handle this one.
1241 */
1242 }
1243 }
1244 }
1245 else
1246 {
1247 /*
1248 * For INTERSECT, if either input is a dummy rel then we can mark the
1249 * result_rel as dummy since intersecting with an empty relation can
1250 * never yield any results. This is true regardless of INTERSECT or
1251 * INTERSECT ALL.
1252 */
1253 if (is_dummy_rel(lrel) || is_dummy_rel(rrel))
1254 {
1255 mark_dummy_rel(result_rel);
1256
1257 return result_rel;
1258 }
1259 }
1260
1261 /*
1262 * Estimate number of distinct groups that we'll need hashtable entries
1263 * for; this is the size of the left-hand input for EXCEPT, or the smaller
1264 * input for INTERSECT. Also estimate the number of eventual output rows.
1265 * In non-ALL cases, we estimate each group produces one output row; in
1266 * ALL cases use the relevant relation size. These are worst-case
1267 * estimates, of course, but we need to be conservative.
1268 */
1269 if (op->op == SETOP_EXCEPT)
1270 {
1271 dNumGroups = dLeftGroups;
1272 dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1273 }
1274 else
1275 {
1276 dNumGroups = dLeftGroups;
1277 dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1278 }
1279 result_rel->rows = dNumOutputRows;
1280
1281 /* Select the SetOpCmd type */
1282 switch (op->op)
1283 {
1284 case SETOP_INTERSECT:
1286 break;
1287 case SETOP_EXCEPT:
1289 break;
1290 default:
1291 elog(ERROR, "unrecognized set op: %d", (int) op->op);
1292 cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1293 break;
1294 }
1295
1296 /*
1297 * If we can hash, that just requires a SetOp atop the cheapest inputs.
1298 */
1299 if (can_hash)
1300 {
1301 path = (Path *) create_setop_path(root,
1302 result_rel,
1303 lpath,
1304 rpath,
1305 cmd,
1307 groupList,
1308 dNumGroups,
1309 dNumOutputRows);
1310 add_path(result_rel, path);
1311 }
1312
1313 /*
1314 * If we can sort, generate the cheapest sorted input paths, and add a
1315 * SetOp atop those.
1316 */
1317 if (can_sort)
1318 {
1319 List *pathkeys;
1320 Path *slpath,
1321 *srpath;
1322
1323 /* First the left input ... */
1325 groupList,
1326 lpath_tlist);
1327 if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
1328 slpath = lpath; /* cheapest path is already sorted */
1329 else
1330 {
1332 nonunion_pathkeys,
1333 NULL,
1334 TOTAL_COST,
1335 false);
1336 /* Subquery failed to produce any presorted paths? */
1337 if (slpath == NULL)
1338 slpath = (Path *) create_sort_path(root,
1339 lpath->parent,
1340 lpath,
1341 pathkeys,
1342 -1.0);
1343 }
1344
1345 /* and now the same for the right. */
1347 groupList,
1348 rpath_tlist);
1349 if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
1350 srpath = rpath; /* cheapest path is already sorted */
1351 else
1352 {
1354 nonunion_pathkeys,
1355 NULL,
1356 TOTAL_COST,
1357 false);
1358 /* Subquery failed to produce any presorted paths? */
1359 if (srpath == NULL)
1360 srpath = (Path *) create_sort_path(root,
1361 rpath->parent,
1362 rpath,
1363 pathkeys,
1364 -1.0);
1365 }
1366
1367 path = (Path *) create_setop_path(root,
1368 result_rel,
1369 slpath,
1370 srpath,
1371 cmd,
1373 groupList,
1374 dNumGroups,
1375 dNumOutputRows);
1376 add_path(result_rel, path);
1377 }
1378
1379 return result_rel;
1380}
1381
1382/*
1383 * Pull up children of a UNION node that are identically-propertied UNIONs,
1384 * and perform planning of the queries underneath the N-way UNION.
1385 *
1386 * The result is a list of RelOptInfos containing Paths for sub-nodes, with
1387 * one entry for each descendant that is a leaf query or non-identical setop.
1388 * We also return parallel lists of the childrens' targetlists and
1389 * is-trivial-tlist flags.
1390 *
1391 * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1392 * output rows will be lost anyway.
1393 */
1394static List *
1396 SetOperationStmt *top_union,
1397 List *refnames_tlist,
1398 List **tlist_list,
1399 List **istrivial_tlist)
1400{
1401 List *pending_rels = list_make1(top_union);
1402 List *result = NIL;
1403 List *child_tlist;
1404 bool trivial_tlist;
1405
1406 *tlist_list = NIL;
1407 *istrivial_tlist = NIL;
1408
1409 while (pending_rels != NIL)
1410 {
1411 Node *setOp = linitial(pending_rels);
1412
1413 pending_rels = list_delete_first(pending_rels);
1414
1415 if (IsA(setOp, SetOperationStmt))
1416 {
1417 SetOperationStmt *op = (SetOperationStmt *) setOp;
1418
1419 if (op->op == top_union->op &&
1420 (op->all == top_union->all || op->all) &&
1421 equal(op->colTypes, top_union->colTypes) &&
1422 equal(op->colCollations, top_union->colCollations))
1423 {
1424 /* Same UNION, so fold children into parent */
1425 pending_rels = lcons(op->rarg, pending_rels);
1426 pending_rels = lcons(op->larg, pending_rels);
1427 continue;
1428 }
1429 }
1430
1431 /*
1432 * Not same, so plan this child separately.
1433 *
1434 * If top_union isn't a UNION ALL, then we are interested in sorted
1435 * output from the child, so pass top_union as parentOp. Note that
1436 * this isn't necessarily the child node's immediate SetOperationStmt
1437 * parent, but that's fine: it's the effective parent.
1438 */
1439 result = lappend(result, recurse_set_operations(setOp, root,
1440 top_union->all ? NULL : top_union,
1441 top_union->colTypes,
1442 top_union->colCollations,
1443 refnames_tlist,
1444 &child_tlist,
1445 &trivial_tlist));
1446 *tlist_list = lappend(*tlist_list, child_tlist);
1447 *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1448 }
1449
1450 return result;
1451}
1452
1453/*
1454 * postprocess_setop_rel - perform steps required after adding paths
1455 */
1456static void
1458{
1459 /*
1460 * We don't currently worry about allowing FDWs to contribute paths to
1461 * this relation, but give extensions a chance.
1462 */
1464 (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1465 NULL, rel, NULL);
1466
1467 /* Select cheapest path */
1468 set_cheapest(rel);
1469}
1470
1471/*
1472 * Generate targetlist for a set-operation plan node
1473 *
1474 * colTypes: OID list of set-op's result column datatypes
1475 * colCollations: OID list of set-op's result column collations
1476 * varno: varno to use in generated Vars
1477 * hack_constants: true to copy up constants (see comments in code)
1478 * input_tlist: targetlist of this node's input node
1479 * refnames_tlist: targetlist to take column names from
1480 * trivial_tlist: output parameter, set to true if targetlist is trivial
1481 */
1482static List *
1483generate_setop_tlist(List *colTypes, List *colCollations,
1484 Index varno,
1485 bool hack_constants,
1486 List *input_tlist,
1487 List *refnames_tlist,
1488 bool *trivial_tlist)
1489{
1490 List *tlist = NIL;
1491 int resno = 1;
1492 ListCell *ctlc,
1493 *cclc,
1494 *itlc,
1495 *rtlc;
1496 TargetEntry *tle;
1497 Node *expr;
1498
1499 *trivial_tlist = true; /* until proven differently */
1500
1501 forfour(ctlc, colTypes, cclc, colCollations,
1502 itlc, input_tlist, rtlc, refnames_tlist)
1503 {
1504 Oid colType = lfirst_oid(ctlc);
1505 Oid colColl = lfirst_oid(cclc);
1506 TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1507 TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1508
1509 Assert(inputtle->resno == resno);
1510 Assert(reftle->resno == resno);
1511 Assert(!inputtle->resjunk);
1512 Assert(!reftle->resjunk);
1513
1514 /*
1515 * Generate columns referencing input columns and having appropriate
1516 * data types and column names. Insert datatype coercions where
1517 * necessary.
1518 *
1519 * HACK: constants in the input's targetlist are copied up as-is
1520 * rather than being referenced as subquery outputs. This is mainly
1521 * to ensure that when we try to coerce them to the output column's
1522 * datatype, the right things happen for UNKNOWN constants. But do
1523 * this only at the first level of subquery-scan plans; we don't want
1524 * phony constants appearing in the output tlists of upper-level
1525 * nodes!
1526 *
1527 * Note that copying a constant doesn't in itself require us to mark
1528 * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1529 */
1530 if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1531 expr = (Node *) inputtle->expr;
1532 else
1533 expr = (Node *) makeVar(varno,
1534 inputtle->resno,
1535 exprType((Node *) inputtle->expr),
1536 exprTypmod((Node *) inputtle->expr),
1537 exprCollation((Node *) inputtle->expr),
1538 0);
1539
1540 if (exprType(expr) != colType)
1541 {
1542 /*
1543 * Note: it's not really cool to be applying coerce_to_common_type
1544 * here; one notable point is that assign_expr_collations never
1545 * gets run on any generated nodes. For the moment that's not a
1546 * problem because we force the correct exposed collation below.
1547 * It would likely be best to make the parser generate the correct
1548 * output tlist for every set-op to begin with, though.
1549 */
1550 expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1551 expr,
1552 colType,
1553 "UNION/INTERSECT/EXCEPT");
1554 *trivial_tlist = false; /* the coercion makes it not trivial */
1555 }
1556
1557 /*
1558 * Ensure the tlist entry's exposed collation matches the set-op. This
1559 * is necessary because plan_set_operations() reports the result
1560 * ordering as a list of SortGroupClauses, which don't carry collation
1561 * themselves but just refer to tlist entries. If we don't show the
1562 * right collation then planner.c might do the wrong thing in
1563 * higher-level queries.
1564 *
1565 * Note we use RelabelType, not CollateExpr, since this expression
1566 * will reach the executor without any further processing.
1567 */
1568 if (exprCollation(expr) != colColl)
1569 {
1570 expr = applyRelabelType(expr,
1571 exprType(expr), exprTypmod(expr), colColl,
1572 COERCE_IMPLICIT_CAST, -1, false);
1573 *trivial_tlist = false; /* the relabel makes it not trivial */
1574 }
1575
1576 tle = makeTargetEntry((Expr *) expr,
1577 (AttrNumber) resno++,
1578 pstrdup(reftle->resname),
1579 false);
1580
1581 /*
1582 * By convention, all output columns in a setop tree have
1583 * ressortgroupref equal to their resno. In some cases the ref isn't
1584 * needed, but this is a cleaner way than modifying the tlist later.
1585 */
1586 tle->ressortgroupref = tle->resno;
1587
1588 tlist = lappend(tlist, tle);
1589 }
1590
1591 return tlist;
1592}
1593
1594/*
1595 * Generate targetlist for a set-operation Append node
1596 *
1597 * colTypes: OID list of set-op's result column datatypes
1598 * colCollations: OID list of set-op's result column collations
1599 * input_tlists: list of tlists for sub-plans of the Append
1600 * refnames_tlist: targetlist to take column names from
1601 *
1602 * The entries in the Append's targetlist should always be simple Vars;
1603 * we just have to make sure they have the right datatypes/typmods/collations.
1604 * The Vars are always generated with varno 0.
1605 *
1606 * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1607 * cannot figure out a realistic width for the tlist we make here. But we
1608 * ought to refactor this code to produce a PathTarget directly, anyway.
1609 */
1610static List *
1611generate_append_tlist(List *colTypes, List *colCollations,
1612 List *input_tlists,
1613 List *refnames_tlist)
1614{
1615 List *tlist = NIL;
1616 int resno = 1;
1617 ListCell *curColType;
1618 ListCell *curColCollation;
1619 ListCell *ref_tl_item;
1620 int colindex;
1621 TargetEntry *tle;
1622 Node *expr;
1623 ListCell *tlistl;
1624 int32 *colTypmods;
1625
1626 /*
1627 * First extract typmods to use.
1628 *
1629 * If the inputs all agree on type and typmod of a particular column, use
1630 * that typmod; else use -1.
1631 */
1632 colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1633
1634 foreach(tlistl, input_tlists)
1635 {
1636 List *subtlist = (List *) lfirst(tlistl);
1637 ListCell *subtlistl;
1638
1639 curColType = list_head(colTypes);
1640 colindex = 0;
1641 foreach(subtlistl, subtlist)
1642 {
1643 TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1644
1645 Assert(!subtle->resjunk);
1646 Assert(curColType != NULL);
1647 if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1648 {
1649 /* If first subplan, copy the typmod; else compare */
1650 int32 subtypmod = exprTypmod((Node *) subtle->expr);
1651
1652 if (tlistl == list_head(input_tlists))
1653 colTypmods[colindex] = subtypmod;
1654 else if (subtypmod != colTypmods[colindex])
1655 colTypmods[colindex] = -1;
1656 }
1657 else
1658 {
1659 /* types disagree, so force typmod to -1 */
1660 colTypmods[colindex] = -1;
1661 }
1662 curColType = lnext(colTypes, curColType);
1663 colindex++;
1664 }
1665 Assert(curColType == NULL);
1666 }
1667
1668 /*
1669 * Now we can build the tlist for the Append.
1670 */
1671 colindex = 0;
1672 forthree(curColType, colTypes, curColCollation, colCollations,
1673 ref_tl_item, refnames_tlist)
1674 {
1675 Oid colType = lfirst_oid(curColType);
1676 int32 colTypmod = colTypmods[colindex++];
1677 Oid colColl = lfirst_oid(curColCollation);
1678 TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1679
1680 Assert(reftle->resno == resno);
1681 Assert(!reftle->resjunk);
1682 expr = (Node *) makeVar(0,
1683 resno,
1684 colType,
1685 colTypmod,
1686 colColl,
1687 0);
1688 tle = makeTargetEntry((Expr *) expr,
1689 (AttrNumber) resno++,
1690 pstrdup(reftle->resname),
1691 false);
1692
1693 /*
1694 * By convention, all output columns in a setop tree have
1695 * ressortgroupref equal to their resno. In some cases the ref isn't
1696 * needed, but this is a cleaner way than modifying the tlist later.
1697 */
1698 tle->ressortgroupref = tle->resno;
1699
1700 tlist = lappend(tlist, tle);
1701 }
1702
1703 pfree(colTypmods);
1704
1705 return tlist;
1706}
1707
1708/*
1709 * generate_setop_grouplist
1710 * Build a SortGroupClause list defining the sort/grouping properties
1711 * of the setop's output columns.
1712 *
1713 * Parse analysis already determined the properties and built a suitable
1714 * list, except that the entries do not have sortgrouprefs set because
1715 * the parser output representation doesn't include a tlist for each
1716 * setop. So what we need to do here is copy that list and install
1717 * proper sortgrouprefs into it (copying those from the targetlist).
1718 */
1719static List *
1721{
1722 List *grouplist = copyObject(op->groupClauses);
1723 ListCell *lg;
1724 ListCell *lt;
1725
1726 lg = list_head(grouplist);
1727 foreach(lt, targetlist)
1728 {
1729 TargetEntry *tle = (TargetEntry *) lfirst(lt);
1730 SortGroupClause *sgc;
1731
1732 Assert(!tle->resjunk);
1733
1734 /* non-resjunk columns should have sortgroupref = resno */
1735 Assert(tle->ressortgroupref == tle->resno);
1736
1737 /* non-resjunk columns should have grouping clauses */
1738 Assert(lg != NULL);
1739 sgc = (SortGroupClause *) lfirst(lg);
1740 lg = lnext(grouplist, lg);
1741 Assert(sgc->tleSortGroupRef == 0);
1742
1743 sgc->tleSortGroupRef = tle->ressortgroupref;
1744 }
1745 Assert(lg == NULL);
1746 return grouplist;
1747}
1748
1749/*
1750 * create_setop_pathtarget
1751 * Do the normal create_pathtarget() work, plus set the resulting
1752 * PathTarget's width to the average width of the Paths in child_pathlist
1753 * weighted using the estimated row count of each path.
1754 *
1755 * Note: This is required because set op target lists use varno==0, which
1756 * results in a type default width estimate rather than one that's based on
1757 * statistics of the columns from the set op children.
1758 */
1759static PathTarget *
1761{
1762 PathTarget *reltarget;
1763 ListCell *lc;
1764 double parent_rows = 0;
1765 double parent_size = 0;
1766
1767 reltarget = create_pathtarget(root, tlist);
1768
1769 /* Calculate the total rows and total size. */
1770 foreach(lc, child_pathlist)
1771 {
1772 Path *path = (Path *) lfirst(lc);
1773
1774 parent_rows += path->rows;
1775 parent_size += path->parent->reltarget->width * path->rows;
1776 }
1777
1778 if (parent_rows > 0)
1779 reltarget->width = rint(parent_size / parent_rows);
1780
1781 return reltarget;
1782}
int16 AttrNumber
Definition: attnum.h:21
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:917
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:251
#define bms_is_empty(a)
Definition: bitmapset.h:118
#define Min(x, y)
Definition: c.h:1008
#define Max(x, y)
Definition: c.h:1002
int32_t int32
Definition: c.h:539
unsigned int Index
Definition: c.h:624
int max_parallel_workers_per_gather
Definition: costsize.c:143
void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel)
Definition: costsize.c:5912
bool enable_parallel_append
Definition: costsize.c:161
bool enable_incremental_sort
Definition: costsize.c:151
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
void add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel, List *child_tlist, List *setop_pathkeys)
Definition: equivclass.c:3084
Assert(PointerIsAligned(start, uint64))
bool is_dummy_rel(RelOptInfo *rel)
Definition: joinrels.c:1464
void mark_dummy_rel(RelOptInfo *rel)
Definition: joinrels.c:1513
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_delete_first(List *list)
Definition: list.c:943
List * lappend_int(List *list, int datum)
Definition: list.c:357
List * lcons(void *datum, List *list)
Definition: list.c:495
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:311
Var * makeVar(int varno, AttrNumber varattno, Oid vartype, int32 vartypmod, Oid varcollid, Index varlevelsup)
Definition: makefuncs.c:66
TargetEntry * makeTargetEntry(Expr *expr, AttrNumber resno, char *resname, bool resjunk)
Definition: makefuncs.c:289
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:301
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:821
Node * applyRelabelType(Node *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat, int rlocation, bool overwrite_ok)
Definition: nodeFuncs.c:636
SetOpCmd
Definition: nodes.h:407
@ SETOPCMD_EXCEPT
Definition: nodes.h:410
@ SETOPCMD_EXCEPT_ALL
Definition: nodes.h:411
@ SETOPCMD_INTERSECT_ALL
Definition: nodes.h:409
@ SETOPCMD_INTERSECT
Definition: nodes.h:408
@ SETOP_HASHED
Definition: nodes.h:417
@ SETOP_SORTED
Definition: nodes.h:416
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
#define copyObject(obj)
Definition: nodes.h:232
#define nodeTag(nodeptr)
Definition: nodes.h:139
@ AGG_HASHED
Definition: nodes.h:366
@ AGGSPLIT_SIMPLE
Definition: nodes.h:387
#define castNode(_type_, nodeptr)
Definition: nodes.h:182
Node * coerce_to_common_type(ParseState *pstate, Node *node, Oid targetTypeId, const char *context)
@ SETOP_INTERSECT
Definition: parsenodes.h:2178
@ SETOP_UNION
Definition: parsenodes.h:2177
@ SETOP_EXCEPT
Definition: parsenodes.h:2179
@ RTE_SUBQUERY
Definition: parsenodes.h:1044
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:620
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:558
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1336
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1054
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:343
SetOpPath * create_setop_path(PlannerInfo *root, RelOptInfo *rel, Path *leftpath, Path *rightpath, SetOpCmd cmd, SetOpStrategy strategy, List *groupList, double numGroups, double outputRows)
Definition: pathnode.c:3403
ProjectionPath * create_projection_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target)
Definition: pathnode.c:2524
Path * apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, PathTarget *target)
Definition: pathnode.c:2633
void set_cheapest(RelOptInfo *parent_rel)
Definition: pathnode.c:270
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:795
AppendPath * create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, double rows)
Definition: pathnode.c:1300
SubqueryScanPath * create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, bool trivial_pathtarget, List *pathkeys, Relids required_outer)
Definition: pathnode.c:1846
IncrementalSortPath * create_incremental_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, int presorted_keys, double limit_tuples)
Definition: pathnode.c:2792
SortPath * create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, double limit_tuples)
Definition: pathnode.c:2841
GatherPath * create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, Relids required_outer, double *rows)
Definition: pathnode.c:1802
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:461
UniquePath * create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, int numCols, double numGroups)
Definition: pathnode.c:2942
AggPath * create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, AggStrategy aggstrategy, AggSplit aggsplit, List *groupClause, List *qual, const AggClauseCosts *aggcosts, double numGroups)
Definition: pathnode.c:2994
RecursiveUnionPath * create_recursiveunion_path(PlannerInfo *root, RelOptInfo *rel, Path *leftpath, Path *rightpath, PathTarget *target, List *distinctList, int wtParam, double numGroups)
Definition: pathnode.c:3522
MergeAppendPath * create_merge_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *pathkeys, Relids required_outer)
Definition: pathnode.c:1471
@ TOTAL_COST
Definition: pathnodes.h:38
@ UPPERREL_SETOP
Definition: pathnodes.h:71
@ UPPERREL_FINAL
Definition: pathnodes.h:79
@ RELOPT_UPPER_REL
Definition: pathnodes.h:887
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:41
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
#define list_make1(x1)
Definition: pg_list.h:212
#define forthree(cell1, list1, cell2, list2, cell3, list3)
Definition: pg_list.h:563
#define linitial(l)
Definition: pg_list.h:178
#define forfour(cell1, list1, cell2, list2, cell3, list3, cell4, list4)
Definition: pg_list.h:575
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define list_make2(x1, x2)
Definition: pg_list.h:214
char * choose_plan_name(PlannerGlobal *glob, const char *name, bool always_number)
Definition: planner.c:8961
PlannerInfo * subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, PlannerInfo *parent_root, bool hasRecursion, double tuple_fraction, SetOperationStmt *setops)
Definition: planner.c:693
create_upper_paths_hook_type create_upper_paths_hook
Definition: planner.c:83
unsigned int Oid
Definition: postgres_ext.h:32
RelOptInfo * plan_set_operations(PlannerInfo *root)
Definition: prepunion.c:97
static List * generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
Definition: prepunion.c:1720
static RelOptInfo * generate_union_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:690
static RelOptInfo * generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:1050
static PathTarget * create_setop_pathtarget(PlannerInfo *root, List *tlist, List *child_pathlist)
Definition: prepunion.c:1760
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
Definition: prepunion.c:1457
static List * generate_setop_tlist(List *colTypes, List *colCollations, Index varno, bool hack_constants, List *input_tlist, List *refnames_tlist, bool *trivial_tlist)
Definition: prepunion.c:1483
static RelOptInfo * recurse_set_operations(Node *setOp, PlannerInfo *root, SetOperationStmt *parentOp, List *colTypes, List *colCollations, List *refnames_tlist, List **pTargetList, bool *istrivial_tlist)
Definition: prepunion.c:213
static RelOptInfo * generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:364
static List * generate_append_tlist(List *colTypes, List *colCollations, List *input_tlists, List *refnames_tlist)
Definition: prepunion.c:1611
static List * plan_union_children(PlannerInfo *root, SetOperationStmt *top_union, List *refnames_tlist, List **tlist_list, List **istrivial_tlist)
Definition: prepunion.c:1395
static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, bool trivial_tlist, List *child_tlist, List *interesting_pathkeys, double *pNumGroups)
Definition: prepunion.c:488
@ COERCE_IMPLICIT_CAST
Definition: primnodes.h:768
tree ctl root
Definition: radixtree.h:1857
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:717
void setup_simple_rel_arrays(PlannerInfo *root)
Definition: relnode.c:108
RelOptInfo * fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
Definition: relnode.c:1581
RelOptInfo * build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
Definition: relnode.c:206
double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
Definition: selfuncs.c:3768
void check_stack_depth(void)
Definition: stack_depth.c:95
Definition: pg_list.h:54
Definition: nodes.h:135
List * pathkeys
Definition: pathnodes.h:1912
Cardinality rows
Definition: pathnodes.h:1906
List * processed_tlist
Definition: pathnodes.h:499
bool hasHavingQual
Definition: pathnodes.h:539
Query * parse
Definition: pathnodes.h:227
Cardinality limit_tuples
Definition: pathnodes.h:526
List * setop_pathkeys
Definition: pathnodes.h:441
List * groupClause
Definition: parsenodes.h:216
List * targetList
Definition: parsenodes.h:198
List * groupingSets
Definition: parsenodes.h:220
List * distinctClause
Definition: parsenodes.h:226
Query * subquery
Definition: parsenodes.h:1135
Relids relids
Definition: pathnodes.h:927
struct PathTarget * reltarget
Definition: pathnodes.h:949
bool consider_parallel
Definition: pathnodes.h:943
Relids lateral_relids
Definition: pathnodes.h:968
List * pathlist
Definition: pathnodes.h:954
struct Path * cheapest_total_path
Definition: pathnodes.h:958
bool consider_startup
Definition: pathnodes.h:939
List * partial_pathlist
Definition: pathnodes.h:956
PlannerInfo * subroot
Definition: pathnodes.h:1004
Cardinality rows
Definition: pathnodes.h:933
RTEKind rtekind
Definition: pathnodes.h:977
SetOperation op
Definition: parsenodes.h:2255
Index tleSortGroupRef
Definition: parsenodes.h:1469
Expr * expr
Definition: primnodes.h:2239
AttrNumber resno
Definition: primnodes.h:2241
Index ressortgroupref
Definition: primnodes.h:2245
bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
Definition: tlist.c:282
bool grouping_is_sortable(List *groupClause)
Definition: tlist.c:540
List * make_tlist_from_pathtarget(PathTarget *target)
Definition: tlist.c:624
List * get_tlist_exprs(List *tlist, bool includeJunk)
Definition: tlist.c:163
bool grouping_is_hashable(List *groupClause)
Definition: tlist.c:560
bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
Definition: tlist.c:248
#define create_pathtarget(root, tlist)
Definition: tlist.h:53