@@ -161,6 +161,7 @@ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
161161static void set_rel_width (PlannerInfo * root , RelOptInfo * rel );
162162static double relation_byte_size (double tuples , int width );
163163static double page_size (double tuples , int width );
164+ static double get_parallel_divisor (Path * path );
164165
165166
166167/*
@@ -238,32 +239,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
238239 /* Adjust costing for parallelism, if used. */
239240 if (path -> parallel_workers > 0 )
240241 {
241- double parallel_divisor = path -> parallel_workers ;
242- double leader_contribution ;
243-
244- /*
245- * Early experience with parallel query suggests that when there is
246- * only one worker, the leader often makes a very substantial
247- * contribution to executing the parallel portion of the plan, but as
248- * more workers are added, it does less and less, because it's busy
249- * reading tuples from the workers and doing whatever non-parallel
250- * post-processing is needed. By the time we reach 4 workers, the
251- * leader no longer makes a meaningful contribution. Thus, for now,
252- * estimate that the leader spends 30% of its time servicing each
253- * worker, and the remainder executing the parallel plan.
254- */
255- leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
256- if (leader_contribution > 0 )
257- parallel_divisor += leader_contribution ;
258-
259- /*
260- * In the case of a parallel plan, the row count needs to represent
261- * the number of tuples processed per worker. Otherwise, higher-level
262- * plan nodes that appear below the gather will be costed incorrectly,
263- * because they'll anticipate receiving more rows than any given copy
264- * will actually get.
265- */
266- path -> rows = clamp_row_est (path -> rows / parallel_divisor );
242+ double parallel_divisor = get_parallel_divisor (path );
267243
268244 /* The CPU cost is divided among all the workers. */
269245 cpu_run_cost /= parallel_divisor ;
@@ -274,6 +250,12 @@ cost_seqscan(Path *path, PlannerInfo *root,
274250 * prefetching. For now, we assume that the disk run cost can't be
275251 * amortized at all.
276252 */
253+
254+ /*
255+ * In the case of a parallel plan, the row count needs to represent
256+ * the number of tuples processed per worker.
257+ */
258+ path -> rows = clamp_row_est (path -> rows / parallel_divisor );
277259 }
278260
279261 path -> startup_cost = startup_cost ;
@@ -2013,6 +1995,10 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
20131995 else
20141996 path -> path .rows = path -> path .parent -> rows ;
20151997
1998+ /* For partial paths, scale row estimate. */
1999+ if (path -> path .parallel_workers > 0 )
2000+ path -> path .rows /= get_parallel_divisor (& path -> path );
2001+
20162002 /*
20172003 * We could include disable_cost in the preliminary estimate, but that
20182004 * would amount to optimizing for the case where the join method is
@@ -2431,6 +2417,10 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
24312417 else
24322418 path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
24332419
2420+ /* For partial paths, scale row estimate. */
2421+ if (path -> jpath .path .parallel_workers > 0 )
2422+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2423+
24342424 /*
24352425 * We could include disable_cost in the preliminary estimate, but that
24362426 * would amount to optimizing for the case where the join method is
@@ -2810,6 +2800,10 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
28102800 else
28112801 path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
28122802
2803+ /* For partial paths, scale row estimate. */
2804+ if (path -> jpath .path .parallel_workers > 0 )
2805+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2806+
28132807 /*
28142808 * We could include disable_cost in the preliminary estimate, but that
28152809 * would amount to optimizing for the case where the join method is
@@ -4798,3 +4792,31 @@ page_size(double tuples, int width)
47984792{
47994793 return ceil (relation_byte_size (tuples , width ) / BLCKSZ );
48004794}
4795+
4796+ /*
4797+ * Estimate the fraction of the work that each worker will do given the
4798+ * number of workers budgeted for the path.
4799+ */
4800+ static double
4801+ get_parallel_divisor (Path * path )
4802+ {
4803+ double parallel_divisor = path -> parallel_workers ;
4804+ double leader_contribution ;
4805+
4806+ /*
4807+ * Early experience with parallel query suggests that when there is only
4808+ * one worker, the leader often makes a very substantial contribution to
4809+ * executing the parallel portion of the plan, but as more workers are
4810+ * added, it does less and less, because it's busy reading tuples from the
4811+ * workers and doing whatever non-parallel post-processing is needed. By
4812+ * the time we reach 4 workers, the leader no longer makes a meaningful
4813+ * contribution. Thus, for now, estimate that the leader spends 30% of
4814+ * its time servicing each worker, and the remainder executing the
4815+ * parallel plan.
4816+ */
4817+ leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
4818+ if (leader_contribution > 0 )
4819+ parallel_divisor += leader_contribution ;
4820+
4821+ return parallel_divisor ;
4822+ }
0 commit comments