@@ -189,6 +189,12 @@ static void create_one_window_path(PlannerInfo *root,
189189 List * activeWindows );
190190static RelOptInfo * create_distinct_paths (PlannerInfo * root ,
191191 RelOptInfo * input_rel );
192+ static void create_partial_distinct_paths (PlannerInfo * root ,
193+ RelOptInfo * input_rel ,
194+ RelOptInfo * final_distinct_rel );
195+ static RelOptInfo * create_final_distinct_paths (PlannerInfo * root ,
196+ RelOptInfo * input_rel ,
197+ RelOptInfo * distinct_rel );
192198static RelOptInfo * create_ordered_paths (PlannerInfo * root ,
193199 RelOptInfo * input_rel ,
194200 PathTarget * target ,
@@ -1570,6 +1576,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
15701576 */
15711577 root -> upper_targets [UPPERREL_FINAL ] = final_target ;
15721578 root -> upper_targets [UPPERREL_ORDERED ] = final_target ;
1579+ root -> upper_targets [UPPERREL_PARTIAL_DISTINCT ] = sort_input_target ;
15731580 root -> upper_targets [UPPERREL_DISTINCT ] = sort_input_target ;
15741581 root -> upper_targets [UPPERREL_WINDOW ] = sort_input_target ;
15751582 root -> upper_targets [UPPERREL_GROUP_AGG ] = grouping_target ;
@@ -4227,16 +4234,9 @@ create_one_window_path(PlannerInfo *root,
42274234 * Sort/Unique won't project anything.
42284235 */
42294236static RelOptInfo *
4230- create_distinct_paths (PlannerInfo * root ,
4231- RelOptInfo * input_rel )
4237+ create_distinct_paths (PlannerInfo * root , RelOptInfo * input_rel )
42324238{
4233- Query * parse = root -> parse ;
4234- Path * cheapest_input_path = input_rel -> cheapest_total_path ;
42354239 RelOptInfo * distinct_rel ;
4236- double numDistinctRows ;
4237- bool allow_hash ;
4238- Path * path ;
4239- ListCell * lc ;
42404240
42414241 /* For now, do all work in the (DISTINCT, NULL) upperrel */
42424242 distinct_rel = fetch_upper_rel (root , UPPERREL_DISTINCT , NULL );
@@ -4258,6 +4258,184 @@ create_distinct_paths(PlannerInfo *root,
42584258 distinct_rel -> useridiscurrent = input_rel -> useridiscurrent ;
42594259 distinct_rel -> fdwroutine = input_rel -> fdwroutine ;
42604260
4261+ /* build distinct paths based on input_rel's pathlist */
4262+ create_final_distinct_paths (root , input_rel , distinct_rel );
4263+
4264+ /* now build distinct paths based on input_rel's partial_pathlist */
4265+ create_partial_distinct_paths (root , input_rel , distinct_rel );
4266+
4267+ /* Give a helpful error if we failed to create any paths */
4268+ if (distinct_rel -> pathlist == NIL )
4269+ ereport (ERROR ,
4270+ (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
4271+ errmsg ("could not implement DISTINCT" ),
4272+ errdetail ("Some of the datatypes only support hashing, while others only support sorting." )));
4273+
4274+ /*
4275+ * If there is an FDW that's responsible for all baserels of the query,
4276+ * let it consider adding ForeignPaths.
4277+ */
4278+ if (distinct_rel -> fdwroutine &&
4279+ distinct_rel -> fdwroutine -> GetForeignUpperPaths )
4280+ distinct_rel -> fdwroutine -> GetForeignUpperPaths (root ,
4281+ UPPERREL_DISTINCT ,
4282+ input_rel ,
4283+ distinct_rel ,
4284+ NULL );
4285+
4286+ /* Let extensions possibly add some more paths */
4287+ if (create_upper_paths_hook )
4288+ (* create_upper_paths_hook ) (root , UPPERREL_DISTINCT , input_rel ,
4289+ distinct_rel , NULL );
4290+
4291+ /* Now choose the best path(s) */
4292+ set_cheapest (distinct_rel );
4293+
4294+ return distinct_rel ;
4295+ }
4296+
4297+ /*
4298+ * create_partial_distinct_paths
4299+ *
4300+ * Process 'input_rel' partial paths and add unique/aggregate paths to the
4301+ * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4302+ * paths on top and add a final unique/aggregate path to remove any duplicate
4303+ * produced from combining rows from parallel workers.
4304+ */
4305+ static void
4306+ create_partial_distinct_paths (PlannerInfo * root , RelOptInfo * input_rel ,
4307+ RelOptInfo * final_distinct_rel )
4308+ {
4309+ RelOptInfo * partial_distinct_rel ;
4310+ Query * parse ;
4311+ List * distinctExprs ;
4312+ double numDistinctRows ;
4313+ Path * cheapest_partial_path ;
4314+ ListCell * lc ;
4315+
4316+ /* nothing to do when there are no partial paths in the input rel */
4317+ if (!input_rel -> consider_parallel || input_rel -> partial_pathlist == NIL )
4318+ return ;
4319+
4320+ parse = root -> parse ;
4321+
4322+ /* can't do parallel DISTINCT ON */
4323+ if (parse -> hasDistinctOn )
4324+ return ;
4325+
4326+ partial_distinct_rel = fetch_upper_rel (root , UPPERREL_PARTIAL_DISTINCT ,
4327+ NULL );
4328+ partial_distinct_rel -> reltarget = root -> upper_targets [UPPERREL_PARTIAL_DISTINCT ];
4329+ partial_distinct_rel -> consider_parallel = input_rel -> consider_parallel ;
4330+
4331+ /*
4332+ * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4333+ */
4334+ partial_distinct_rel -> serverid = input_rel -> serverid ;
4335+ partial_distinct_rel -> userid = input_rel -> userid ;
4336+ partial_distinct_rel -> useridiscurrent = input_rel -> useridiscurrent ;
4337+ partial_distinct_rel -> fdwroutine = input_rel -> fdwroutine ;
4338+
4339+ cheapest_partial_path = linitial (input_rel -> partial_pathlist );
4340+
4341+ distinctExprs = get_sortgrouplist_exprs (parse -> distinctClause ,
4342+ parse -> targetList );
4343+
4344+ /* estimate how many distinct rows we'll get from each worker */
4345+ numDistinctRows = estimate_num_groups (root , distinctExprs ,
4346+ cheapest_partial_path -> rows ,
4347+ NULL , NULL );
4348+
4349+ /* first try adding unique paths atop of sorted paths */
4350+ if (grouping_is_sortable (parse -> distinctClause ))
4351+ {
4352+ foreach (lc , input_rel -> partial_pathlist )
4353+ {
4354+ Path * path = (Path * ) lfirst (lc );
4355+
4356+ if (pathkeys_contained_in (root -> distinct_pathkeys , path -> pathkeys ))
4357+ {
4358+ add_partial_path (partial_distinct_rel , (Path * )
4359+ create_upper_unique_path (root ,
4360+ partial_distinct_rel ,
4361+ path ,
4362+ list_length (root -> distinct_pathkeys ),
4363+ numDistinctRows ));
4364+ }
4365+ }
4366+ }
4367+
4368+ /*
4369+ * Now try hash aggregate paths, if enabled and hashing is possible. Since
4370+ * we're not on the hook to ensure we do our best to create at least one
4371+ * path here, we treat enable_hashagg as a hard off-switch rather than the
4372+ * slightly softer variant in create_final_distinct_paths.
4373+ */
4374+ if (enable_hashagg && grouping_is_hashable (parse -> distinctClause ))
4375+ {
4376+ add_partial_path (partial_distinct_rel , (Path * )
4377+ create_agg_path (root ,
4378+ partial_distinct_rel ,
4379+ cheapest_partial_path ,
4380+ cheapest_partial_path -> pathtarget ,
4381+ AGG_HASHED ,
4382+ AGGSPLIT_SIMPLE ,
4383+ parse -> distinctClause ,
4384+ NIL ,
4385+ NULL ,
4386+ numDistinctRows ));
4387+ }
4388+
4389+ /*
4390+ * If there is an FDW that's responsible for all baserels of the query,
4391+ * let it consider adding ForeignPaths.
4392+ */
4393+ if (partial_distinct_rel -> fdwroutine &&
4394+ partial_distinct_rel -> fdwroutine -> GetForeignUpperPaths )
4395+ partial_distinct_rel -> fdwroutine -> GetForeignUpperPaths (root ,
4396+ UPPERREL_PARTIAL_DISTINCT ,
4397+ input_rel ,
4398+ partial_distinct_rel ,
4399+ NULL );
4400+
4401+ /* Let extensions possibly add some more partial paths */
4402+ if (create_upper_paths_hook )
4403+ (* create_upper_paths_hook ) (root , UPPERREL_PARTIAL_DISTINCT ,
4404+ input_rel , partial_distinct_rel , NULL );
4405+
4406+ if (partial_distinct_rel -> partial_pathlist != NIL )
4407+ {
4408+ generate_gather_paths (root , partial_distinct_rel , true);
4409+ set_cheapest (partial_distinct_rel );
4410+
4411+ /*
4412+ * Finally, create paths to distinctify the final result. This step
4413+ * is needed to remove any duplicates due to combining rows from
4414+ * parallel workers.
4415+ */
4416+ create_final_distinct_paths (root , partial_distinct_rel ,
4417+ final_distinct_rel );
4418+ }
4419+ }
4420+
4421+ /*
4422+ * create_final_distinct_paths
4423+ * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4424+ *
4425+ * input_rel: contains the source-data paths
4426+ * distinct_rel: destination relation for storing created paths
4427+ */
4428+ static RelOptInfo *
4429+ create_final_distinct_paths (PlannerInfo * root , RelOptInfo * input_rel ,
4430+ RelOptInfo * distinct_rel )
4431+ {
4432+ Query * parse = root -> parse ;
4433+ Path * cheapest_input_path = input_rel -> cheapest_total_path ;
4434+ double numDistinctRows ;
4435+ bool allow_hash ;
4436+ Path * path ;
4437+ ListCell * lc ;
4438+
42614439 /* Estimate number of distinct rows there will be */
42624440 if (parse -> groupClause || parse -> groupingSets || parse -> hasAggs ||
42634441 root -> hasHavingQual )
@@ -4384,31 +4562,6 @@ create_distinct_paths(PlannerInfo *root,
43844562 numDistinctRows ));
43854563 }
43864564
4387- /* Give a helpful error if we failed to find any implementation */
4388- if (distinct_rel -> pathlist == NIL )
4389- ereport (ERROR ,
4390- (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
4391- errmsg ("could not implement DISTINCT" ),
4392- errdetail ("Some of the datatypes only support hashing, while others only support sorting." )));
4393-
4394- /*
4395- * If there is an FDW that's responsible for all baserels of the query,
4396- * let it consider adding ForeignPaths.
4397- */
4398- if (distinct_rel -> fdwroutine &&
4399- distinct_rel -> fdwroutine -> GetForeignUpperPaths )
4400- distinct_rel -> fdwroutine -> GetForeignUpperPaths (root , UPPERREL_DISTINCT ,
4401- input_rel , distinct_rel ,
4402- NULL );
4403-
4404- /* Let extensions possibly add some more paths */
4405- if (create_upper_paths_hook )
4406- (* create_upper_paths_hook ) (root , UPPERREL_DISTINCT ,
4407- input_rel , distinct_rel , NULL );
4408-
4409- /* Now choose the best path(s) */
4410- set_cheapest (distinct_rel );
4411-
44124565 return distinct_rel ;
44134566}
44144567
0 commit comments