@@ -161,6 +161,7 @@ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
161161static void set_rel_width (PlannerInfo * root , RelOptInfo * rel );
162162static double relation_byte_size (double tuples , int width );
163163static double page_size (double tuples , int width );
164+ static double get_parallel_divisor (Path * path );
164165
165166
166167/*
@@ -238,32 +239,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
238239 /* Adjust costing for parallelism, if used. */
239240 if (path -> parallel_workers > 0 )
240241 {
241- double parallel_divisor = path -> parallel_workers ;
242- double leader_contribution ;
243-
244- /*
245- * Early experience with parallel query suggests that when there is
246- * only one worker, the leader often makes a very substantial
247- * contribution to executing the parallel portion of the plan, but as
248- * more workers are added, it does less and less, because it's busy
249- * reading tuples from the workers and doing whatever non-parallel
250- * post-processing is needed. By the time we reach 4 workers, the
251- * leader no longer makes a meaningful contribution. Thus, for now,
252- * estimate that the leader spends 30% of its time servicing each
253- * worker, and the remainder executing the parallel plan.
254- */
255- leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
256- if (leader_contribution > 0 )
257- parallel_divisor += leader_contribution ;
258-
259- /*
260- * In the case of a parallel plan, the row count needs to represent
261- * the number of tuples processed per worker. Otherwise, higher-level
262- * plan nodes that appear below the gather will be costed incorrectly,
263- * because they'll anticipate receiving more rows than any given copy
264- * will actually get.
265- */
266- path -> rows = clamp_row_est (path -> rows / parallel_divisor );
242+ double parallel_divisor = get_parallel_divisor (path );
267243
268244 /* The CPU cost is divided among all the workers. */
269245 cpu_run_cost /= parallel_divisor ;
@@ -274,6 +250,12 @@ cost_seqscan(Path *path, PlannerInfo *root,
274250 * prefetching. For now, we assume that the disk run cost can't be
275251 * amortized at all.
276252 */
253+
254+ /*
255+ * In the case of a parallel plan, the row count needs to represent
256+ * the number of tuples processed per worker.
257+ */
258+ path -> rows = clamp_row_est (path -> rows / parallel_divisor );
277259 }
278260
279261 path -> startup_cost = startup_cost ;
@@ -2014,6 +1996,10 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
20141996 else
20151997 path -> path .rows = path -> path .parent -> rows ;
20161998
1999+ /* For partial paths, scale row estimate. */
2000+ if (path -> path .parallel_workers > 0 )
2001+ path -> path .rows /= get_parallel_divisor (& path -> path );
2002+
20172003 /*
20182004 * We could include disable_cost in the preliminary estimate, but that
20192005 * would amount to optimizing for the case where the join method is
@@ -2432,6 +2418,10 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
24322418 else
24332419 path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
24342420
2421+ /* For partial paths, scale row estimate. */
2422+ if (path -> jpath .path .parallel_workers > 0 )
2423+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2424+
24352425 /*
24362426 * We could include disable_cost in the preliminary estimate, but that
24372427 * would amount to optimizing for the case where the join method is
@@ -2811,6 +2801,10 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
28112801 else
28122802 path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
28132803
2804+ /* For partial paths, scale row estimate. */
2805+ if (path -> jpath .path .parallel_workers > 0 )
2806+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2807+
28142808 /*
28152809 * We could include disable_cost in the preliminary estimate, but that
28162810 * would amount to optimizing for the case where the join method is
@@ -4799,3 +4793,31 @@ page_size(double tuples, int width)
47994793{
48004794 return ceil (relation_byte_size (tuples , width ) / BLCKSZ );
48014795}
4796+
4797+ /*
4798+ * Estimate the fraction of the work that each worker will do given the
4799+ * number of workers budgeted for the path.
4800+ */
4801+ static double
4802+ get_parallel_divisor (Path * path )
4803+ {
4804+ double parallel_divisor = path -> parallel_workers ;
4805+ double leader_contribution ;
4806+
4807+ /*
4808+ * Early experience with parallel query suggests that when there is only
4809+ * one worker, the leader often makes a very substantial contribution to
4810+ * executing the parallel portion of the plan, but as more workers are
4811+ * added, it does less and less, because it's busy reading tuples from the
4812+ * workers and doing whatever non-parallel post-processing is needed. By
4813+ * the time we reach 4 workers, the leader no longer makes a meaningful
4814+ * contribution. Thus, for now, estimate that the leader spends 30% of
4815+ * its time servicing each worker, and the remainder executing the
4816+ * parallel plan.
4817+ */
4818+ leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
4819+ if (leader_contribution > 0 )
4820+ parallel_divisor += leader_contribution ;
4821+
4822+ return parallel_divisor ;
4823+ }
0 commit comments