@@ -4654,22 +4654,63 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
46544654 cheapest_partial_path -> rows ,
46554655 NULL , NULL );
46564656
4657- /* first try adding unique paths atop of sorted paths */
4657+ /*
4658+ * Try sorting the cheapest path and incrementally sorting any paths with
4659+ * presorted keys and put a unique paths atop of those.
4660+ */
46584661 if (grouping_is_sortable (parse -> distinctClause ))
46594662 {
46604663 foreach (lc , input_rel -> partial_pathlist )
46614664 {
4662- Path * path = (Path * ) lfirst (lc );
4665+ Path * input_path = (Path * ) lfirst (lc );
4666+ Path * sorted_path ;
4667+ bool is_sorted ;
4668+ int presorted_keys ;
46634669
4664- if (pathkeys_contained_in (root -> distinct_pathkeys , path -> pathkeys ))
4670+ is_sorted = pathkeys_count_contained_in (root -> distinct_pathkeys ,
4671+ input_path -> pathkeys ,
4672+ & presorted_keys );
4673+
4674+ if (is_sorted )
4675+ sorted_path = input_path ;
4676+ else
46654677 {
4666- add_partial_path (partial_distinct_rel , (Path * )
4667- create_upper_unique_path (root ,
4668- partial_distinct_rel ,
4669- path ,
4670- list_length (root -> distinct_pathkeys ),
4671- numDistinctRows ));
4678+ /*
4679+ * Try at least sorting the cheapest path and also try
4680+ * incrementally sorting any path which is partially sorted
4681+ * already (no need to deal with paths which have presorted
4682+ * keys when incremental sort is disabled unless it's the
4683+ * cheapest partial path).
4684+ */
4685+ if (input_path != cheapest_partial_path &&
4686+ (presorted_keys == 0 || !enable_incremental_sort ))
4687+ continue ;
4688+
4689+ /*
4690+ * We've no need to consider both a sort and incremental sort.
4691+ * We'll just do a sort if there are no presorted keys and an
4692+ * incremental sort when there are presorted keys.
4693+ */
4694+ if (presorted_keys == 0 || !enable_incremental_sort )
4695+ sorted_path = (Path * ) create_sort_path (root ,
4696+ partial_distinct_rel ,
4697+ input_path ,
4698+ root -> distinct_pathkeys ,
4699+ -1.0 );
4700+ else
4701+ sorted_path = (Path * ) create_incremental_sort_path (root ,
4702+ partial_distinct_rel ,
4703+ input_path ,
4704+ root -> distinct_pathkeys ,
4705+ presorted_keys ,
4706+ -1.0 );
46724707 }
4708+
4709+ add_partial_path (partial_distinct_rel , (Path * )
4710+ create_upper_unique_path (root , partial_distinct_rel ,
4711+ sorted_path ,
4712+ list_length (root -> distinct_pathkeys ),
4713+ numDistinctRows ));
46734714 }
46744715 }
46754716
@@ -4773,9 +4814,11 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
47734814 if (grouping_is_sortable (parse -> distinctClause ))
47744815 {
47754816 /*
4776- * First, if we have any adequately-presorted paths, just stick a
4777- * Unique node on those. Then consider doing an explicit sort of the
4778- * cheapest input path and Unique'ing that.
4817+ * Firstly, if we have any adequately-presorted paths, just stick a
4818+ * Unique node on those. We also, consider doing an explicit sort of
4819+ * the cheapest input path and Unique'ing that. If any paths have
4820+ * presorted keys then we'll create an incremental sort atop of those
4821+ * before adding a unique node on the top.
47794822 *
47804823 * When we have DISTINCT ON, we must sort by the more rigorous of
47814824 * DISTINCT and ORDER BY, else it won't have the desired behavior.
@@ -4785,8 +4828,8 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
47854828 * the other.)
47864829 */
47874830 List * needed_pathkeys ;
4788- Path * path ;
47894831 ListCell * lc ;
4832+ double limittuples = root -> distinct_pathkeys == NIL ? 1.0 : -1.0 ;
47904833
47914834 if (parse -> hasDistinctOn &&
47924835 list_length (root -> distinct_pathkeys ) <
@@ -4797,96 +4840,89 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
47974840
47984841 foreach (lc , input_rel -> pathlist )
47994842 {
4800- path = (Path * ) lfirst (lc );
4843+ Path * input_path = (Path * ) lfirst (lc );
4844+ Path * sorted_path ;
4845+ bool is_sorted ;
4846+ int presorted_keys ;
48014847
4802- if (pathkeys_contained_in (needed_pathkeys , path -> pathkeys ))
4848+ is_sorted = pathkeys_count_contained_in (needed_pathkeys ,
4849+ input_path -> pathkeys ,
4850+ & presorted_keys );
4851+
4852+ if (is_sorted )
4853+ sorted_path = input_path ;
4854+ else
48034855 {
48044856 /*
4805- * distinct_pathkeys may have become empty if all of the
4806- * pathkeys were determined to be redundant. If all of the
4807- * pathkeys are redundant then each DISTINCT target must only
4808- * allow a single value, therefore all resulting tuples must
4809- * be identical (or at least indistinguishable by an equality
4810- * check). We can uniquify these tuples simply by just taking
4811- * the first tuple. All we do here is add a path to do "LIMIT
4812- * 1" atop of 'path'. When doing a DISTINCT ON we may still
4813- * have a non-NIL sort_pathkeys list, so we must still only do
4814- * this with paths which are correctly sorted by
4815- * sort_pathkeys.
4857+ * Try at least sorting the cheapest path and also try
4858+ * incrementally sorting any path which is partially sorted
4859+ * already (no need to deal with paths which have presorted
4860+ * keys when incremental sort is disabled unless it's the
4861+ * cheapest input path).
48164862 */
4817- if (root -> distinct_pathkeys == NIL )
4818- {
4819- Node * limitCount ;
4820-
4821- limitCount = (Node * ) makeConst (INT8OID , -1 , InvalidOid ,
4822- sizeof (int64 ),
4823- Int64GetDatum (1 ), false,
4824- FLOAT8PASSBYVAL );
4863+ if (input_path != cheapest_input_path &&
4864+ (presorted_keys == 0 || !enable_incremental_sort ))
4865+ continue ;
48254866
4826- /*
4827- * If the query already has a LIMIT clause, then we could
4828- * end up with a duplicate LimitPath in the final plan.
4829- * That does not seem worth troubling over too much.
4830- */
4831- add_path (distinct_rel , (Path * )
4832- create_limit_path (root , distinct_rel , path , NULL ,
4833- limitCount , LIMIT_OPTION_COUNT ,
4834- 0 , 1 ));
4835- }
4867+ /*
4868+ * We've no need to consider both a sort and incremental sort.
4869+ * We'll just do a sort if there are no presorted keys and an
4870+ * incremental sort when there are presorted keys.
4871+ */
4872+ if (presorted_keys == 0 || !enable_incremental_sort )
4873+ sorted_path = (Path * ) create_sort_path (root ,
4874+ distinct_rel ,
4875+ input_path ,
4876+ needed_pathkeys ,
4877+ limittuples );
48364878 else
4837- {
4838- add_path (distinct_rel , (Path * )
4839- create_upper_unique_path (root , distinct_rel ,
4840- path ,
4841- list_length (root -> distinct_pathkeys ),
4842- numDistinctRows ));
4843- }
4879+ sorted_path = (Path * ) create_incremental_sort_path (root ,
4880+ distinct_rel ,
4881+ input_path ,
4882+ needed_pathkeys ,
4883+ presorted_keys ,
4884+ limittuples );
48444885 }
4845- }
48464886
4847- /* For explicit-sort case, always use the more rigorous clause */
4848- if (list_length (root -> distinct_pathkeys ) <
4849- list_length (root -> sort_pathkeys ))
4850- {
4851- needed_pathkeys = root -> sort_pathkeys ;
4852- /* Assert checks that parser didn't mess up... */
4853- Assert (pathkeys_contained_in (root -> distinct_pathkeys ,
4854- needed_pathkeys ));
4855- }
4856- else
4857- needed_pathkeys = root -> distinct_pathkeys ;
4887+ /*
4888+ * distinct_pathkeys may have become empty if all of the pathkeys
4889+ * were determined to be redundant. If all of the pathkeys are
4890+ * redundant then each DISTINCT target must only allow a single
4891+ * value, therefore all resulting tuples must be identical (or at
4892+ * least indistinguishable by an equality check). We can uniquify
4893+ * these tuples simply by just taking the first tuple. All we do
4894+ * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
4895+ * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
4896+ * list, so we must still only do this with paths which are
4897+ * correctly sorted by sort_pathkeys.
4898+ */
4899+ if (root -> distinct_pathkeys == NIL )
4900+ {
4901+ Node * limitCount ;
48584902
4859- path = cheapest_input_path ;
4860- if (!pathkeys_contained_in (needed_pathkeys , path -> pathkeys ))
4861- path = (Path * ) create_sort_path (root , distinct_rel ,
4862- path ,
4863- needed_pathkeys ,
4864- root -> distinct_pathkeys == NIL ?
4865- 1.0 : -1.0 );
4903+ limitCount = (Node * ) makeConst (INT8OID , -1 , InvalidOid ,
4904+ sizeof (int64 ),
4905+ Int64GetDatum (1 ), false,
4906+ FLOAT8PASSBYVAL );
48664907
4867- /*
4868- * As above, use a LimitPath instead of a UniquePath when all of the
4869- * distinct_pathkeys are redundant and we're only going to get a
4870- * series of tuples all with the same values anyway.
4871- */
4872- if (root -> distinct_pathkeys == NIL )
4873- {
4874- Node * limitCount = (Node * ) makeConst (INT8OID , -1 , InvalidOid ,
4875- sizeof (int64 ),
4876- Int64GetDatum (1 ), false,
4877- FLOAT8PASSBYVAL );
4878-
4879- add_path (distinct_rel , (Path * )
4880- create_limit_path (root , distinct_rel , path , NULL ,
4881- limitCount , LIMIT_OPTION_COUNT , 0 , 1 ));
4882- }
4883- else
4884- {
4885- add_path (distinct_rel , (Path * )
4886- create_upper_unique_path (root , distinct_rel ,
4887- path ,
4888- list_length (root -> distinct_pathkeys ),
4889- numDistinctRows ));
4908+ /*
4909+ * If the query already has a LIMIT clause, then we could end
4910+ * up with a duplicate LimitPath in the final plan. That does
4911+ * not seem worth troubling over too much.
4912+ */
4913+ add_path (distinct_rel , (Path * )
4914+ create_limit_path (root , distinct_rel , sorted_path ,
4915+ NULL , limitCount ,
4916+ LIMIT_OPTION_COUNT , 0 , 1 ));
4917+ }
4918+ else
4919+ {
4920+ add_path (distinct_rel , (Path * )
4921+ create_upper_unique_path (root , distinct_rel ,
4922+ sorted_path ,
4923+ list_length (root -> distinct_pathkeys ),
4924+ numDistinctRows ));
4925+ }
48904926 }
48914927 }
48924928
0 commit comments