WIP: Add pg_plan_advice contrib module.

author Robert Haas <rhaas@postgresql.org>

Tue, 4 Nov 2025 19:45:31 +0000 (14:45 -0500)

committer Robert Haas <rhaas@postgresql.org>

Thu, 6 Nov 2025 16:41:59 +0000 (11:41 -0500)
author Robert Haas <rhaas@postgresql.org>
Tue, 4 Nov 2025 19:45:31 +0000 (14:45 -0500)
committer Robert Haas <rhaas@postgresql.org>
Thu, 6 Nov 2025 16:41:59 +0000 (11:41 -0500)
diff --git a/contrib/Makefile b/contrib/Makefile

index 2f0a88d3f77448d3f6b9f1dda3de3690be747552..dd04c20acd25b6a577e297a99db4f2e36e0c91fc 100644 (file)
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -34,6 +34,7 @@ SUBDIRS = \
                 pg_freespacemap \
                 pg_logicalinspect \
                 pg_overexplain \
+               pg_plan_advice \
                 pg_prewarm      \
                 pg_stat_statements \
                 pg_surgery      \
diff --git a/contrib/meson.build b/contrib/meson.build

index ed30ee7d639f6690d2d848a41adbc6e7c18c9cee..cb718dbdac0bc7f69fcf6cce1300417378643af1 100644 (file)
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -48,6 +48,7 @@ subdir('pgcrypto')
  subdir('pg_freespacemap')
  subdir('pg_logicalinspect')
  subdir('pg_overexplain')
+subdir('pg_plan_advice')
  subdir('pg_prewarm')
  subdir('pgrowlocks')
  subdir('pg_stat_statements')
diff --git a/contrib/pg_plan_advice/.gitignore b/contrib/pg_plan_advice/.gitignore

new file mode 100644 (file)

index 0000000..19a1425
--- /dev/null
+++ b/contrib/pg_plan_advice/.gitignore
@@ -0,0 +1,3 @@
+/pgpa_parser.h
+/pgpa_parser.c
+/pgpa_scanner.c
diff --git a/contrib/pg_plan_advice/Makefile b/contrib/pg_plan_advice/Makefile

new file mode 100644 (file)

index 0000000..d7e06fc
--- /dev/null
+++ b/contrib/pg_plan_advice/Makefile
@@ -0,0 +1,46 @@
+# contrib/pg_plan_advice/Makefile
+
+MODULE_big = pg_plan_advice
+OBJS = \
+       $(WIN32RES) \
+       pg_plan_advice.o \
+       pgpa_ast.o \
+       pgpa_collector.o \
+       pgpa_identifier.o \
+       pgpa_join.o \
+       pgpa_output.o \
+       pgpa_parser.o \
+       pgpa_planner.o \
+       pgpa_scan.o \
+       pgpa_scanner.o \
+       pgpa_trove.o \
+       pgpa_walker.o
+
+EXTENSION = pg_plan_advice
+DATA = pg_plan_advice--1.0.sql
+PGFILEDESC = "pg_plan_advice - help the planner get the right plan"
+
+REGRESS = gather join_order join_strategy partitionwise scan
+TAP_TESTS = 1
+
+EXTRA_CLEAN = pgpa_parser.h pgpa_parser.c pgpa_scanner.c
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_plan_advice
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+# See notes in src/backend/parser/Makefile about the following two rules
+pgpa_parser.h: pgpa_parser.c
+       touch $@
+
+pgpa_parser.c: BISONFLAGS += -d
+
+# Force these dependencies to be known even without dependency info built:
+pgpa_parser.o pgpa_scanner.o: pgpa_parser.h
diff --git a/contrib/pg_plan_advice/README b/contrib/pg_plan_advice/README

new file mode 100644 (file)

index 0000000..4590cd0
--- /dev/null
+++ b/contrib/pg_plan_advice/README
@@ -0,0 +1,275 @@
+contrib/pg_plan_advice/README
+
+Plan Advice
+===========
+
+This module implements a mini-language for "plan advice" that allows for
+control of certain key planner decisions. Goals include (1) enforcing plan
+stability (my previous plan was good and I would like to keep getting a
+similar one) and (2) allowing users to experiment with plans other than
+the one preferred by the optimizer. Non-goals include (1) controlling
+every possible planner decision and (2) forcing consideration of plans
+that the optimizer rejects for reasons other than cost. (There is some
+room for bikeshedding about what exactly this non-goal means: what if
+we skip path generation entirely for a certain case on the theory that
+we know it cannot win on cost? Does that count as a cost-based rejection
+even though no cost was ever computed?)
+
+Generally, plan advice is a series of whitespace-separated advice items,
+each of which applies an advice tag to a list of advice targets. For
+example, "SEQ_SCAN(foo) HASH_JOIN(bar@ss)" contains two items of advice,
+the first of which applies the SEQ_SCAN tag to "foo" and the second of
+which applies the HASH_JOIN tag to "bar@ss". In this simple example, each
+target identifies a single relation; see "Relation Identifiers", below.
+Advice tags can also be applied to groups of relations; for example,
+"HASH_JOIN(baz (bletch quux))" applies the HASH_JOIN tag to the single
+relation identifier "baz" as well as to the 2-item list containing
+"bletch" and "quux".
+
+Critically, this module knows both how to generate plan advice from an
+already-existing plan, and also how to enforce it during future planning
+cycles. Everything it does is intended to be "round-trip safe": if you
+generate advice from a plan and then feed that back into a future planing
+cycle, each piece of advice should be guaranteed to apply to the exactly the
+same part of the query from which it was generated without ambiguity or
+guesswork, and it should succesfully enforce the same planning decision that
+led to it being generated in the first place. Note that there is no
+intention that these guarantees hold in the presence of intervening DDL;
+e.g. if you change the properties of a function so that a subquery is no
+longer inlined, or if you drop an index named in the plan advice, the advice
+isn't going to work any more. That's expected.
+
+This module aims to force the planner to follow any provided advice without
+regard to whether it is appears to be good advice or bad advice.  If the
+user provides bad advice, whether derived from a previously-generated plan
+or manually written, they may get a bad plan. We regard this as user error,
+not a defect in this module. It seems likely that applying advice
+judiciously and only when truly required to avoid problems will be a more
+successful strategy than applying it with a broad brush, but users are free
+to experiment with whatever strategies they think best.
+
+Relation Identifiers
+====================
+
+Uniquely identifying the part of a query to which a certain piece of
+advice applies is harder than it sounds. Our basic approach is to use
+relation aliases as a starting point, and then disambiguate. There are
+three ways that same relation alias can occur multiple times:
+
+1. It can appear in more than one subquery.
+
+2. It can appear more than once in the same subquery,
+   e.g. (foo JOIN bar) x JOIN foo.
+
+3. The table can be partitioned.
+
+Any combination of these things can occur simultaneously.  Therefore, our
+general syntax for a relation identifier is:
+
+alias_name#occurrence_number/partition_schema.partition_name@plan_name
+
+All components except for the alias_name are optional and included only
+when required. When a component is omitted, the associated punctuation
+must also be omitted. Occurrence numbers are counted ignoring children of
+partitioned tables.  When the generated occurrence number is 1, we omit
+the occurrence number. The partition schema and partition name are included
+only for children of partitioned tables. In generated advice, the
+partition_schema is always included whenever there is a partition_name,
+but user-written advice may mention the name and omit the schema. The
+plan_name is omitted for the top-level PlannerInfo.
+
+Scan Advice
+===========
+
+For many types of scan, no advice is generated or possible; for instance,
+a subquery is always scanned using a subquery scan. While that scan may be
+elided via setrefs processing, this doesn't change the fact that only one
+basic approach exists. Hence, scan advice applies mostly to relations, which
+can be scanned in multiple ways.
+
+We tend to think of a scan as targeting a single relation, and that's
+normally the case, but it doesn't have to be. For instance, if a join is
+proven empty, the whole thing may be replaced with a single Result node
+which, in effect, is a degenerate scan of every relation in the collapsed
+portion of the join tree. Similarly, it's possible to inject a custom scan
+in such a way that it replaces an entire join. If we ever emit advice
+for these cases, it would target sets of relation identifiers surrounded
+by curly brances, e.g. SOME_SORT_OF_SCAN(foo (bar baz)) would mean that the
+the given scan type would be used for foo as a single relation and also the
+combination of bar and baz as a join product. We have no such cases at
+present.
+
+For index and index-only scans, both the relation being scanned and the
+index or indexes being used must be specified. For example, INDEX_SCAN(foo
+foo_a_idx bar bar_b_idx) indicates that an index scan (not an index-only
+scan) should be used on foo_a_idx when scanning foo, and that an index scan
+should be used on bar_b_idx when scanning bar.
+
+Bitmap heap scans allow for a more complicated index specification. For
+example, BITMAP_HEAP_SCAN(foo &&(foo_a_idx ||(foo_b_idx foo_c_idx))) says
+that foo should be scanned using a BitmapHeapScan over a BitmapAnd between
+foo_a_idx and the result of a BitmapOr between foo_b_idx and foo_c_idx.
+
+XXX: Currently, BITMAP_HEAP_SCAN does not enforce the index specification,
+because the available hooks are insufficient to do so. It's possible that
+this should be changed to exclude the index specification altogether and
+simply insist that some sort of bitmap heap scan is used; alternatively,
+we need better hooks.
+
+Join Order Advice
+=================
+
+The JOIN_ORDER tag specifies the order in which several tables that are
+part of the same join problem should be joined. Each subquery (except for
+those that are inlined) is a separate join problem. Within a subquery,
+partitionwise joins can create additional, separate join problems. Hence,
+queries involving partitionwise joins may use JOIN_ORDER() many times.
+
+We take the canonical join structure to be an outer-deep tree, so
+JOIN_ORDER(t1 t2 t3) says that t1 is the driving table and should be joined
+first to t2 and then to t3. If the join problem involves additional tables,
+they can be joined in any order after the join between t1, t2, and t3 has
+been constructured. Generated join advice always mentions all tables
+in the join problem, but manually written join advice need not do so.
+
+For trees which are not outer-deep, parentheses can be used. For example,
+JOIN_ORDER(t1 (t2 t3)) says that the top-level join should have t1 on the
+outer side and a join between t2 and t3 on the inner side. That join should
+be constructed so that t2 is on the outer side and t3 is on the inner side.
+
+In some cases, it's not possible to fully specify the join order in this way.
+For example, if t2 and t3 are being scanned by a single custom scan or foreign
+scan, or if a partitionwise join is being performed between those tables, then
+it's impossible to say that t2 is the outer table and t3 is the inner table,
+or the other way around; it's just undefined. In such cases, we generate
+join advice that uses curly braces, intending to indicate a lack of ordering:
+JOIN_ORDER(t1 {t2 t3}) says that the uppermost join should have t1 on the outer
+side and some kind of join between t2 and t3 on the inner side, but without
+saying how that join must be performed or anything about which relation should
+appear on which side of the join, or even whether this kind of join has sides.
+
+Join Strategy Advice
+====================
+
+Tags such as NESTED_LOOP_PLAIN specify the method that should be used to
+perform a certain join. More specifically, NESTED_LOOP_PLAIN(x (y z)) says
+that the plan should put the relation whose identifier is "x" on the inner
+side of a plain nested loop (one without materialization or memoization)
+and that it should also put a join between the relation whose identifier is
+"y" and the relation whose identifier is "z" on the inner side of a nested
+loop. Hence, for an N-table join problem, there will be N-1 pieces of join
+strategy advice; no join strategy advice is required for the outermost
+table in the join problem.
+
+Considering that we have both join order advice and join strategy advice,
+it might seem natural to say that NESTED_LOOP_PLAIN(x) should be redefined
+to mean that x should appear by itself on one side or the other of a nested
+loop, rather than specifically on the inner side, but this definition appears
+useless in practice. It gives the planner too much freedom to do things that
+bear little resemblance to what the user probably had in mind. This makes
+only a limited amount of practical difference in the case of a merge join or
+unparameterized nested loop, but for a parameterized nested loop or a hash
+join, the two sides are treated very differently and saying that a certain
+relation should be involved in one of those operations without saying which
+role it should take isn't saying much.
+
+This choice of definition implies that join strategy advice also imposes some
+join order constraints. For example, given a join between foo and bar,
+HASH_JOIN(bar) implies that foo is the driving table. Otherwise, it would
+be impossible to put bar beneath the inner side of a Hash Join.
+
+Note that, given this definition, it's reasonable to consider deleting the
+join order advice but applying the join strategy advice. For example,
+consider a star schema with tables fact, dim1, dim2, dim3, dim4, and dim5.
+The automatically generated advice might specify JOIN_ORDER(fact dim1 dim3
+dim4 dim2 dim5) HASH_JOIN(dim2 dim4) NESTED_LOOP_PLAIN(dim1 dim3 dim5).
+Deleting the JOIN_ORDER advice allows the planner to reorder the joins
+however it likes while still forcing the same choice of join method. This
+seems potentially useful, and is one reason why a unified syntax that controls
+both join order and join method in a single locution was not chosen.
+
+Advice Completeness
+===================
+
+An essential guiding principle is that no inference may made on the basis
+of the absence of advice. The user is entitled to remove any portion of the
+generated advice which they deem unsuitable or counterproductive and the
+result should only be to increase the flexibility afforded to the planner.
+This means that if advice can say that a certain optimization or technique
+should be used, it should also be able to say that the optimization or
+technique should not be used. We should never assume that the absence of an
+instruction to do a certain thing means that it should not be done; all
+instructions must be explicit.
+
+Semijoin Uniqueness
+===================
+
+Faced with a semijoin, the planner considers both a direct implementation
+and a plan where the one side is made unique and then an inner join is
+performed. We emit SEMIJOIN_UNIQUE() advice when this transformation occurs
+and SEMIJOIN_NON_UNIQUE() advice when it doesn't. These items work like
+join strategy advice: the inner side of the relevant join is named, and the
+chosen join order must be compatible with the advice having some effect.
+
+XXX: Currently, SEMIJOIN_NON_UNIQUE() advice is emitted in some situations
+where the SEMIJOIN_UNIQUE() approach was determined to be non-viable; ideally,
+we should avoid that.
+
+XXX: Right semijoins haven't been properly thought through. The associated
+code probably just doesn't work.
+
+XXX: Semijoin uniqueness advice has no automated tests and need substantially
+more manual testing.
+
+Partitionwise
+=============
+
+PARTITIONWISE() advise can be used to specify both those partitionwise joins
+which should be performed and those which should not be performed; the idea
+is that each argument to PARTITIONWISE specifies a set of relations that
+should be scanned partitionwise after being joined to each other and nothing
+else. Hence, for example, PARTITIONWISE((t1 t2) t3) specifies that the
+query should contain a partitionwise join between t1 and t2 and that t3
+should not be part of any partitionwise join. If there are no other rels
+in the query, specifying just PARTITIONWISE((t1 t2)) would have the same
+effect, since there would be no other rels to which t3 could be joined in
+a partitionwise fashion.
+
+Parallel Query (Gather, etc.)
+=============================
+
+Each argument to GATHER() or GATHER_MERGE() is a single relation or an
+exact set of relations on top of which a Gather or Gather Merge node,
+respectively, should be placed. Each argument to NO_GATHER() is a single
+relation that should not appear beneath any Gather or Gather Merge node;
+that is, parallelism should not be used.
+
+Implicit Join Order Constraints
+===============================
+
+When JOIN_ORDER() advice is not provided for a particular join problem,
+other pieces of advice may still incidentally constraint the join order.
+For example, a user who specifies HASH_JOIN((foo bar)) is explicitly saying
+that there should be a hash join with exactly foo and bar on the outer
+side of it, but that also implies that foo and bar must be joined to
+each other before either of them is joined to anything else. Otherwise,
+the join the user is attempting to constraint won't actually occur in the
+query, which ends up looking like the system has just decided to ignore
+the advice altogether.
+
+Future Work
+===========
+
+We don't handle choice of aggregation: it would be nice to be able to force
+sorted or grouped aggregation. I'm guessing this can be left to future work.
+
+More seriously, we don't know anything about eager aggregation, which could
+have a large impact on the shape of the plan tree. XXX: This needs some study
+to determine how large a problem it is, and might need to be fixed sooner
+rather than later.
+
+We don't offer any control over estimates, only outcomes. It seems like a
+good idea to incorporate that ability at some future point, as pg_hint_plan
+does. However, since primary goal of the initial development work is to be
+able to induce the planner to recreate a desired plan that worked well in
+the past, this has not been included in the initial development effort.
diff --git a/contrib/pg_plan_advice/expected/gather.out b/contrib/pg_plan_advice/expected/gather.out

new file mode 100644 (file)

index 0000000..d0224a2
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/gather.out
@@ -0,0 +1,320 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 1;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+SET min_parallel_table_scan_size = 0;
+SET debug_parallel_query = off;
+CREATE TABLE gt_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE gt_dim;
+CREATE TABLE gt_fact (
+       id int not null,
+       dim_id integer not null references gt_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO gt_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE gt_fact;
+-- By default, we expect Gather Merge with a parallel hash join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Gather Merge
+   Workers Planned: 1
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Parallel Hash Join
+               Hash Cond: (f.dim_id = d.id)
+               ->  Parallel Seq Scan on gt_fact f
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on gt_dim d
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE((f d))
+(14 rows)
+
+-- Force Gather or Gather Merge of both relations together.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Gather Merge
+   Workers Planned: 1
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Parallel Hash Join
+               Hash Cond: (f.dim_id = d.id)
+               ->  Parallel Seq Scan on gt_fact f
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE((f d)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE((f d))
+(16 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Sort
+   Sort Key: f.dim_id
+   ->  Gather
+         Workers Planned: 1
+         ->  Parallel Hash Join
+               Hash Cond: (f.dim_id = d.id)
+               ->  Parallel Seq Scan on gt_fact f
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER((f d)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER((f d))
+(16 rows)
+
+COMMIT;
+-- Force a separate Gather or Gather Merge operation for each relation.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: f.dim_id
+               ->  Parallel Seq Scan on gt_fact f
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: d.id
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE(f) /* matched */
+   GATHER_MERGE(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE(f d)
+(20 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_fact f
+   ->  Sort
+         Sort Key: d.id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER(f) /* matched */
+   GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER(f d)
+(20 rows)
+
+COMMIT;
+-- Force a Gather or Gather Merge on one relation but no parallelism on other.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: f.dim_id
+               ->  Parallel Seq Scan on gt_fact f
+   ->  Index Scan using gt_dim_pkey on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE(f) /* matched */
+   NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.gt_dim_pkey)
+   GATHER_MERGE(f)
+   NO_GATHER(d)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Seq Scan on gt_fact f
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: d.id
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE(d) /* matched */
+   NO_GATHER(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE(d)
+   NO_GATHER(f)
+(19 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using gt_dim_pkey on gt_dim d
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_fact f
+ Supplied Plan Advice:
+   GATHER(f) /* matched */
+   NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_PLAIN(f)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.gt_dim_pkey)
+   GATHER(f)
+   NO_GATHER(d)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Seq Scan on gt_fact f
+   ->  Sort
+         Sort Key: d.id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER(d) /* matched */
+   NO_GATHER(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER(d)
+   NO_GATHER(f)
+(19 rows)
+
+COMMIT;
+-- Force no Gather or Gather Merge use at all.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'no_gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                   QUERY PLAN                   
+------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using gt_dim_pkey on gt_dim d
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Seq Scan on gt_fact f
+ Supplied Plan Advice:
+   NO_GATHER(f) /* matched */
+   NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_PLAIN(f)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.gt_dim_pkey)
+   NO_GATHER(f d)
+(15 rows)
+
+COMMIT;
+-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Gather
+   Disabled: true
+   Workers Planned: 1
+   ->  Parallel Hash Join
+         Hash Cond: (f.dim_id = d.id)
+         ->  Parallel Seq Scan on gt_fact f
+         ->  Parallel Hash
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE((f d)) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER((f d))
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Gather
+   Workers Planned: 1
+   ->  Parallel Hash Join
+         Hash Cond: (f.dim_id = d.id)
+         ->  Parallel Seq Scan on gt_fact f
+         ->  Parallel Hash
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER((f d)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER((f d))
+(14 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/join_order.out b/contrib/pg_plan_advice/expected/join_order.out

new file mode 100644 (file)

index 0000000..e876523
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/join_order.out
@@ -0,0 +1,292 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,100) g;
+VACUUM ANALYZE jo_dim1;
+CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim2 (id, dim2, val2)
+       SELECT g, 'some filler text ' || g, (g % 7) + 1
+         FROM generate_series(1,1000) g;
+VACUUM ANALYZE jo_dim2;
+CREATE TABLE jo_fact (
+       id int primary key,
+       dim1_id integer not null references jo_dim1 (id),
+       dim2_id integer not null references jo_dim2 (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO jo_fact
+       SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE jo_fact;
+-- We expect to join to d2 first and then d1, since the condition on d2
+-- is more selective.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                QUERY PLAN                
+------------------------------------------
+ Hash Join
+   Hash Cond: (f.dim1_id = d1.id)
+   ->  Hash Join
+         Hash Cond: (f.dim2_id = d2.id)
+         ->  Seq Scan on jo_fact f
+         ->  Hash
+               ->  Seq Scan on jo_dim2 d2
+                     Filter: (val2 = 1)
+   ->  Hash
+         ->  Seq Scan on jo_dim1 d1
+               Filter: (val1 = 1)
+ Generated Plan Advice:
+   JOIN_ORDER(f d2 d1)
+   HASH_JOIN(d2 d1)
+   SEQ_SCAN(f d2 d1)
+   NO_GATHER(f d1 d2)
+(16 rows)
+
+-- Force a few different join orders. Some of these are very inefficient,
+-- but the planner considers them all viable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                QUERY PLAN                
+------------------------------------------
+ Hash Join
+   Hash Cond: (f.dim2_id = d2.id)
+   ->  Hash Join
+         Hash Cond: (f.dim1_id = d1.id)
+         ->  Seq Scan on jo_fact f
+         ->  Hash
+               ->  Seq Scan on jo_dim1 d1
+                     Filter: (val1 = 1)
+   ->  Hash
+         ->  Seq Scan on jo_dim2 d2
+               Filter: (val2 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(f d1 d2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d1 d2)
+   HASH_JOIN(d1 d2)
+   SEQ_SCAN(f d1 d2)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                QUERY PLAN                
+------------------------------------------
+ Hash Join
+   Hash Cond: (f.dim1_id = d1.id)
+   ->  Hash Join
+         Hash Cond: (f.dim2_id = d2.id)
+         ->  Seq Scan on jo_fact f
+         ->  Hash
+               ->  Seq Scan on jo_dim2 d2
+                     Filter: (val2 = 1)
+   ->  Hash
+         ->  Seq Scan on jo_dim1 d1
+               Filter: (val1 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(f d2 d1) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d2 d1)
+   HASH_JOIN(d2 d1)
+   SEQ_SCAN(f d2 d1)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+               QUERY PLAN                
+-----------------------------------------
+ Hash Join
+   Hash Cond: (f.dim2_id = d2.id)
+   ->  Hash Join
+         Hash Cond: (d1.id = f.dim1_id)
+         ->  Seq Scan on jo_dim1 d1
+               Filter: (val1 = 1)
+         ->  Hash
+               ->  Seq Scan on jo_fact f
+   ->  Hash
+         ->  Seq Scan on jo_dim2 d2
+               Filter: (val2 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(d1 f d2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d1 f d2)
+   HASH_JOIN(f d2)
+   SEQ_SCAN(d1 f d2)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((f.dim1_id = d1.id) AND (f.dim2_id = d2.id))
+   ->  Seq Scan on jo_fact f
+   ->  Hash
+         ->  Nested Loop
+               ->  Seq Scan on jo_dim1 d1
+                     Filter: (val1 = 1)
+               ->  Materialize
+                     ->  Seq Scan on jo_dim2 d2
+                           Filter: (val2 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(f (d1 d2)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f (d1 d2))
+   NESTED_LOOP_MATERIALIZE(d2)
+   HASH_JOIN(d2)
+   SEQ_SCAN(f d1 d2)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+COMMIT;
+-- The unusual formulation of this query is intended to prevent the query
+-- planner from reducing the FULL JOIN to some other join type, so that we
+-- can test what happens with a join type that cannot be reordered.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+   ->  Materialize
+         ->  Seq Scan on jo_dim1 d1
+ Generated Plan Advice:
+   JOIN_ORDER(d2 f d1)
+   MERGE_JOIN_PLAIN(f)
+   NESTED_LOOP_MATERIALIZE(d1)
+   SEQ_SCAN(d2 f d1)
+   NO_GATHER(d1 f d2)
+(18 rows)
+
+-- We should not be able to force the planner to join f to d1 first, because
+-- that is not a valid join order, but we should be able to force the planner
+-- to make either d2 or f the driving table.
+BEGIN;
+-- XXX: The advice feedback says 'partially matched' here which isn't exactly
+-- wrong given the way that flag is handled in the code, but it's at the very
+-- least confusing. Something should probably be improved here.
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Disabled: true
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Disabled: true
+         Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+   ->  Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+   JOIN_ORDER(f d1 d2) /* partially matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d2 f d1)
+   MERGE_JOIN_PLAIN(f)
+   NESTED_LOOP_PLAIN(d1)
+   SEQ_SCAN(d2 f d1)
+   NO_GATHER(d1 f d2)
+(21 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0)))
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+   ->  Materialize
+         ->  Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+   JOIN_ORDER(f d2 d1) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d2 d1)
+   MERGE_JOIN_PLAIN(d2)
+   NESTED_LOOP_MATERIALIZE(d1)
+   SEQ_SCAN(f d2 d1)
+   NO_GATHER(d1 f d2)
+(20 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+   ->  Materialize
+         ->  Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+   JOIN_ORDER(d2 f d1) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d2 f d1)
+   MERGE_JOIN_PLAIN(f)
+   NESTED_LOOP_MATERIALIZE(d1)
+   SEQ_SCAN(d2 f d1)
+   NO_GATHER(d1 f d2)
+(20 rows)
+
+COMMIT;
+-- XXX: add tests for join order prefix matching
+-- XXX: join_order(justonerel) shouldn't report partially matched
diff --git a/contrib/pg_plan_advice/expected/join_strategy.out b/contrib/pg_plan_advice/expected/join_strategy.out

new file mode 100644 (file)

index 0000000..71ee26a
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/join_strategy.out
@@ -0,0 +1,297 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE join_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE join_dim;
+CREATE TABLE join_fact (
+       id int primary key,
+       dim_id integer not null references join_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO join_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
+VACUUM ANALYZE join_fact;
+-- We expect a hash join by default.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+             QUERY PLAN             
+------------------------------------
+ Hash Join
+   Hash Cond: (f.dim_id = d.id)
+   ->  Seq Scan on join_fact f
+   ->  Hash
+         ->  Seq Scan on join_dim d
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   NO_GATHER(f d)
+(10 rows)
+
+-- Try forcing each join method in turn with join_dim as the inner table.
+-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
+-- fail, because the planner knows that join_dim (id) is unique, and will
+-- refuse to add mark/restore overhead.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+             QUERY PLAN             
+------------------------------------
+ Hash Join
+   Hash Cond: (f.dim_id = d.id)
+   ->  Seq Scan on join_fact f
+   ->  Hash
+         ->  Seq Scan on join_dim d
+ Supplied Plan Advice:
+   HASH_JOIN(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Disabled: true
+   Merge Cond: (f.dim_id = d.id)
+   ->  Index Scan using join_fact_dim_id on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+ Supplied Plan Advice:
+   MERGE_JOIN_MATERIALIZE(d) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Index Scan using join_fact_dim_id on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+ Supplied Plan Advice:
+   MERGE_JOIN_PLAIN(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey)
+   NO_GATHER(f d)
+(11 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                 QUERY PLAN                 
+--------------------------------------------
+ Nested Loop
+   Join Filter: (f.dim_id = d.id)
+   ->  Seq Scan on join_fact f
+   ->  Materialize
+         ->  Seq Scan on join_dim d
+ Supplied Plan Advice:
+   NESTED_LOOP_MATERIALIZE(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_MATERIALIZE(d)
+   SEQ_SCAN(f d)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_fact f
+   ->  Memoize
+         Cache Key: f.dim_id
+         Cache Mode: logical
+         ->  Index Scan using join_dim_pkey on join_dim d
+               Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+   NESTED_LOOP_MEMOIZE(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_MEMOIZE(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.join_dim_pkey)
+   NO_GATHER(f d)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+         Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+   NESTED_LOOP_PLAIN(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_PLAIN(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.join_dim_pkey)
+   NO_GATHER(f d)
+(12 rows)
+
+COMMIT;
+-- Now try forcing each join method in turn with join_fact as the inner
+-- table. All of these should work.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+             QUERY PLAN              
+-------------------------------------
+ Hash Join
+   Hash Cond: (d.id = f.dim_id)
+   ->  Seq Scan on join_dim d
+   ->  Hash
+         ->  Seq Scan on join_fact f
+ Supplied Plan Advice:
+   HASH_JOIN(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   HASH_JOIN(f)
+   SEQ_SCAN(d f)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using join_dim_pkey on join_dim d
+   ->  Materialize
+         ->  Index Scan using join_fact_dim_id on join_fact f
+ Supplied Plan Advice:
+   MERGE_JOIN_MATERIALIZE(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_MATERIALIZE(f)
+   INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using join_dim_pkey on join_dim d
+   ->  Index Scan using join_fact_dim_id on join_fact f
+ Supplied Plan Advice:
+   MERGE_JOIN_PLAIN(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_PLAIN(f)
+   INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(11 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                 QUERY PLAN                 
+--------------------------------------------
+ Nested Loop
+   Join Filter: (f.dim_id = d.id)
+   ->  Seq Scan on join_dim d
+   ->  Materialize
+         ->  Seq Scan on join_fact f
+ Supplied Plan Advice:
+   NESTED_LOOP_MATERIALIZE(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   NESTED_LOOP_MATERIALIZE(f)
+   SEQ_SCAN(d f)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_dim d
+   ->  Memoize
+         Cache Key: d.id
+         Cache Mode: logical
+         ->  Index Scan using join_fact_dim_id on join_fact f
+               Index Cond: (dim_id = d.id)
+ Supplied Plan Advice:
+   NESTED_LOOP_MEMOIZE(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   NESTED_LOOP_MEMOIZE(f)
+   SEQ_SCAN(d)
+   INDEX_SCAN(f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_dim d
+   ->  Index Scan using join_fact_dim_id on join_fact f
+         Index Cond: (dim_id = d.id)
+ Supplied Plan Advice:
+   NESTED_LOOP_PLAIN(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   NESTED_LOOP_PLAIN(f)
+   SEQ_SCAN(d)
+   INDEX_SCAN(f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(12 rows)
+
+COMMIT;
+-- We can't force a foreign join between these tables, because they
+-- aren't foreign tables.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Nested Loop
+   Disabled: true
+   ->  Seq Scan on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+         Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+   FOREIGN_JOIN((f d)) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_PLAIN(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.join_dim_pkey)
+   NO_GATHER(f d)
+(13 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/local_collector.out b/contrib/pg_plan_advice/expected/local_collector.out

new file mode 100644 (file)

index 0000000..ac5aecd
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/local_collector.out
@@ -0,0 +1,64 @@
+CREATE EXTENSION pg_plan_advice;
+-- Try clearing advice before we've collected any.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice 
+---------------------------------
+ 
+(1 row)
+
+-- Set a small advice collection limit so that we'll exceed it.
+SET pg_plan_advice.local_collection_limit = 2;
+-- Set up a dummy table.
+CREATE TABLE dummy_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false, parallel_workers = 0);
+-- Test queries.
+SELECT * FROM dummy_table a, dummy_table b;
+ a | b | a | b 
+---+---+---+---
+(0 rows)
+
+SELECT * FROM dummy_table;
+ a | b 
+---+---
+(0 rows)
+
+-- Should return the advice from the second test query.
+SELECT advice FROM pg_get_collected_local_advice() ORDER BY id LIMIT 1;
+         advice         
+------------------------
+ SEQ_SCAN(dummy_table) +
+ NO_GATHER(dummy_table)
+(1 row)
+
+-- Now try clearing advice again.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice 
+---------------------------------
+ 
+(1 row)
+
+-- Raise the collection limit so that the collector uses multiple chunks.
+SET pg_plan_advice.local_collection_limit = 2000;
+-- Push a bunch of queries through the collector.
+DO $$
+BEGIN
+       FOR x IN 1..2000 LOOP
+               EXECUTE 'SELECT * FROM dummy_table';
+       END LOOP;
+END
+$$;
+-- Check that the collector worked.
+SELECT COUNT(*) FROM pg_get_collected_local_advice();
+ count 
+-------
+  2000
+(1 row)
+
+-- And clear one more time, to verify that this doesn't cause a problem
+-- even with a larger number of entries.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice 
+---------------------------------
+ 
+(1 row)
+
diff --git a/contrib/pg_plan_advice/expected/partitionwise.out b/contrib/pg_plan_advice/expected/partitionwise.out

new file mode 100644 (file)

index 0000000..df0f055
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/partitionwise.out
@@ -0,0 +1,243 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET enable_partitionwise_join = true;
+CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt1;
+CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt2 (id, dim2, val2)
+       SELECT g, 'some other text ' || g, (g % 5) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt2;
+CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt3 (id, dim3, val3)
+       SELECT g, 'a third random text ' || g, (g % 7) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt3;
+CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int)
+       PARTITION BY RANGE (id);
+CREATE TABLE ptmismatcha PARTITION OF ptmismatch
+    FOR VALUES FROM (1) to (1501)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE ptmismatchb PARTITION OF ptmismatch
+    FOR VALUES FROM (1501) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO ptmismatch (id, dimm, valm)
+       SELECT g, 'yet another text ' || g, (g % 2) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE ptmismatch;
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Append
+   ->  Nested Loop
+         ->  Hash Join
+               Hash Cond: (pt2_1.id = pt3_1.id)
+               ->  Seq Scan on pt2a pt2_1
+                     Filter: (val2 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt3a pt3_1
+                           Filter: (val3 = 1)
+         ->  Index Scan using pt1a_pkey on pt1a pt1_1
+               Index Cond: (id = pt2_1.id)
+               Filter: (val1 = 1)
+   ->  Nested Loop
+         ->  Hash Join
+               Hash Cond: (pt2_2.id = pt3_2.id)
+               ->  Seq Scan on pt2b pt2_2
+                     Filter: (val2 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt3b pt3_2
+                           Filter: (val3 = 1)
+         ->  Index Scan using pt1b_pkey on pt1b pt1_2
+               Index Cond: (id = pt2_2.id)
+               Filter: (val1 = 1)
+   ->  Nested Loop
+         ->  Hash Join
+               Hash Cond: (pt2_3.id = pt3_3.id)
+               ->  Seq Scan on pt2c pt2_3
+                     Filter: (val2 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt3c pt3_3
+                           Filter: (val3 = 1)
+         ->  Index Scan using pt1c_pkey on pt1c pt1_3
+               Index Cond: (id = pt2_3.id)
+               Filter: (val1 = 1)
+ Generated Plan Advice:
+   JOIN_ORDER(pt2/public.pt2a pt3/public.pt3a pt1/public.pt1a)
+   JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b)
+   JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c)
+   NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+   HASH_JOIN(pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+   SEQ_SCAN(pt2/public.pt2a pt3/public.pt3a pt2/public.pt2b pt3/public.pt3b
+    pt2/public.pt2c pt3/public.pt3c)
+   INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey
+    pt1/public.pt1c public.pt1c_pkey)
+   PARTITIONWISE((pt1 pt2 pt3))
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(47 rows)
+
+-- Suppress partitionwise join, or do it just partially.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (pt1.id = pt2.id)
+   ->  Append
+         ->  Seq Scan on pt1a pt1_1
+               Filter: (val1 = 1)
+         ->  Seq Scan on pt1b pt1_2
+               Filter: (val1 = 1)
+         ->  Seq Scan on pt1c pt1_3
+               Filter: (val1 = 1)
+   ->  Hash
+         ->  Hash Join
+               Hash Cond: (pt2.id = pt3.id)
+               ->  Append
+                     ->  Seq Scan on pt2a pt2_1
+                           Filter: (val2 = 1)
+                     ->  Seq Scan on pt2b pt2_2
+                           Filter: (val2 = 1)
+                     ->  Seq Scan on pt2c pt2_3
+                           Filter: (val2 = 1)
+               ->  Hash
+                     ->  Append
+                           ->  Seq Scan on pt3a pt3_1
+                                 Filter: (val3 = 1)
+                           ->  Seq Scan on pt3b pt3_2
+                                 Filter: (val3 = 1)
+                           ->  Seq Scan on pt3c pt3_3
+                                 Filter: (val3 = 1)
+ Supplied Plan Advice:
+   PARTITIONWISE(pt1) /* matched */
+   PARTITIONWISE(pt2) /* matched */
+   PARTITIONWISE(pt3) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(pt1 (pt2 pt3))
+   HASH_JOIN(pt3 pt3)
+   SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b
+    pt3/public.pt3c)
+   PARTITIONWISE(pt1 pt2 pt3)
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(40 rows)
+
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (pt1.id = pt3.id)
+   ->  Append
+         ->  Hash Join
+               Hash Cond: (pt1_1.id = pt2_1.id)
+               ->  Seq Scan on pt1a pt1_1
+                     Filter: (val1 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt2a pt2_1
+                           Filter: (val2 = 1)
+         ->  Hash Join
+               Hash Cond: (pt1_2.id = pt2_2.id)
+               ->  Seq Scan on pt1b pt1_2
+                     Filter: (val1 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt2b pt2_2
+                           Filter: (val2 = 1)
+         ->  Hash Join
+               Hash Cond: (pt1_3.id = pt2_3.id)
+               ->  Seq Scan on pt1c pt1_3
+                     Filter: (val1 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt2c pt2_3
+                           Filter: (val2 = 1)
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on pt3a pt3_1
+                     Filter: (val3 = 1)
+               ->  Seq Scan on pt3b pt3_2
+                     Filter: (val3 = 1)
+               ->  Seq Scan on pt3c pt3_3
+                     Filter: (val3 = 1)
+ Supplied Plan Advice:
+   PARTITIONWISE((pt1 pt2)) /* matched */
+   PARTITIONWISE(pt3) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(pt1/public.pt1a pt2/public.pt2a)
+   JOIN_ORDER(pt1/public.pt1b pt2/public.pt2b)
+   JOIN_ORDER(pt1/public.pt1c pt2/public.pt2c)
+   JOIN_ORDER({pt1 pt2} pt3)
+   HASH_JOIN(pt2/public.pt2a pt2/public.pt2b pt2/public.pt2c pt3)
+   SEQ_SCAN(pt1/public.pt1a pt2/public.pt2a pt1/public.pt1b pt2/public.pt2b
+    pt1/public.pt1c pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b
+    pt3/public.pt3c)
+   PARTITIONWISE((pt1 pt2) pt3)
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(47 rows)
+
+COMMIT;
+-- Can't force a partitionwise join with a mismatched table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Nested Loop
+   Disabled: true
+   ->  Append
+         ->  Seq Scan on pt1a pt1_1
+         ->  Seq Scan on pt1b pt1_2
+         ->  Seq Scan on pt1c pt1_3
+   ->  Append
+         ->  Index Scan using ptmismatcha_pkey on ptmismatcha ptmismatch_1
+               Index Cond: (id = pt1.id)
+         ->  Index Scan using ptmismatchb_pkey on ptmismatchb ptmismatch_2
+               Index Cond: (id = pt1.id)
+ Supplied Plan Advice:
+   PARTITIONWISE((pt1 ptmismatch)) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(pt1 ptmismatch)
+   NESTED_LOOP_PLAIN(ptmismatch)
+   SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+   INDEX_SCAN(ptmismatch/public.ptmismatcha public.ptmismatcha_pkey
+    ptmismatch/public.ptmismatchb public.ptmismatchb_pkey)
+   PARTITIONWISE(pt1 ptmismatch)
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c
+    ptmismatch/public.ptmismatcha ptmismatch/public.ptmismatchb)
+(22 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/scan.out b/contrib/pg_plan_advice/expected/scan.out

new file mode 100644 (file)

index 0000000..61f361f
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/scan.out
@@ -0,0 +1,757 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET seq_page_cost = 0.1;
+SET random_page_cost = 0.1;
+SET cpu_tuple_cost = 0;
+SET cpu_index_tuple_cost = 0;
+CREATE TABLE scan_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO scan_table
+       SELECT g, 'some text ' || g FROM generate_series(1, 100000) g;
+CREATE INDEX scan_table_b ON scan_table USING brin (b);
+VACUUM ANALYZE scan_table;
+-- Sequential scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+       QUERY PLAN        
+-------------------------
+ Seq Scan on scan_table
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(4 rows)
+
+-- Index scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- Index-only scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- Bitmap heap scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+                     QUERY PLAN                     
+----------------------------------------------------
+ Bitmap Heap Scan on scan_table
+   Recheck Cond: (b > 'some text 8'::text)
+   ->  Bitmap Index Scan on scan_table_b
+         Index Cond: (b > 'some text 8'::text)
+ Generated Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table public.scan_table_b)
+   NO_GATHER(scan_table)
+(7 rows)
+
+-- TID scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Scan on scan_table
+   TID Cond: (ctid = '(0,1)'::tid)
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- TID range scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Tid Range Scan on scan_table
+   TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- Try forcing each of our test queries to use the scan type they
+-- wanted to use anyway. This should succeed.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(6 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Bitmap Heap Scan on scan_table
+   Recheck Cond: (b > 'some text 8'::text)
+   ->  Bitmap Index Scan on scan_table_b
+         Index Cond: (b > 'some text 8'::text)
+ Supplied Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table scan_table_b) /* matched */
+ Generated Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table public.scan_table_b)
+   NO_GATHER(scan_table)
+(9 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+              QUERY PLAN              
+--------------------------------------
+ Tid Scan on scan_table
+   TID Cond: (ctid = '(0,1)'::tid)
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Tid Range Scan on scan_table
+   TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Try to force a full scan of the table to use some other scan type. All
+-- of these will fail. An index scan or bitmap heap scan could potentially
+-- generate the correct answer, but the planner does not even consider these
+-- possibilities due to the lack of a WHERE clause.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                  QUERY PLAN                  
+----------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Try again to force index use. This should now succeed for the INDEX_SCAN
+-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the
+-- query fetches columns not included in the index.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a > 0)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+   Filter: (a > 0)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(8 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Bitmap Heap Scan on scan_table
+   Recheck Cond: (a > 0)
+   ->  Bitmap Index Scan on scan_table_pkey
+         Index Cond: (a > 0)
+ Supplied Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(9 rows)
+
+COMMIT;
+-- We can force a primary key lookup to use a sequential scan, but we
+-- can't force it to use an index-only scan (due to the column list)
+-- or a TID scan (due to the absence of a TID qual).
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Disabled: true
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Disabled: true
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched, failed */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can forcibly downgrade an index-only scan to an index scan, but we can't
+-- force the use of an index that the planner thinks is inapplicable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_b) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can force the use of a sequential scan in place of a bitmap heap scan,
+-- but a plain index scan on a BRIN index is not possible.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+   Filter: (b > 'some text 8'::text)
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_b) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can force the use of a sequential scan rather than a TID scan or
+-- TID range scan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+   Filter: (ctid = '(0,1)'::tid)
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on scan_table
+   Filter: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Test more complex scenarios with index scans.
+BEGIN;
+-- Should still work if we mention the schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+-- But not if we mention the wrong schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table cilbup.scan_table_pkey) /* matched, inapplicable */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+-- It's OK to repeat the same advice.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+-- But it doesn't work if the index target is even notionally different.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched, conflicting */
+   INDEX_SCAN(scan_table public.scan_table_pkey) /* matched, conflicting */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- Test assorted incorrect advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(nothing) /* not matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(nothing whatsoever) /* not matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table bogus) /* matched, inapplicable, failed */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(nothing whatsoever) /* not matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table bogus) /* matched, inapplicable */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Test our ability to refer to multiple instances of the same alias.
+BEGIN;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Nested Loop Left Join
+   ->  Nested Loop Left Join
+         ->  Function Scan on generate_series g
+         ->  Index Scan using scan_table_pkey on scan_table s
+               Index Cond: (a = g.g)
+   ->  Index Scan using scan_table_pkey on scan_table s_1
+         Index Cond: (a = g.g)
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   NESTED_LOOP_PLAIN(s s#2)
+   INDEX_SCAN(s public.scan_table_pkey s#2 public.scan_table_pkey)
+   NO_GATHER(g s s#2)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop Left Join
+   ->  Hash Left Join
+         Hash Cond: (g.g = s.a)
+         ->  Function Scan on generate_series g
+         ->  Hash
+               ->  Seq Scan on scan_table s
+   ->  Index Scan using scan_table_pkey on scan_table s_1
+         Index Cond: (a = g.g)
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   NESTED_LOOP_PLAIN(s#2)
+   HASH_JOIN(s)
+   SEQ_SCAN(s)
+   INDEX_SCAN(s#2 public.scan_table_pkey)
+   NO_GATHER(g s s#2)
+(17 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Hash Left Join
+   Hash Cond: (g.g = s_1.a)
+   ->  Nested Loop Left Join
+         ->  Function Scan on generate_series g
+         ->  Index Scan using scan_table_pkey on scan_table s
+               Index Cond: (a = g.g)
+   ->  Hash
+         ->  Seq Scan on scan_table s_1
+ Supplied Plan Advice:
+   SEQ_SCAN(s#2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   NESTED_LOOP_PLAIN(s)
+   HASH_JOIN(s#2)
+   SEQ_SCAN(s#2)
+   INDEX_SCAN(s public.scan_table_pkey)
+   NO_GATHER(g s s#2)
+(17 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Left Join
+   Hash Cond: (g.g = s_1.a)
+   ->  Hash Left Join
+         Hash Cond: (g.g = s.a)
+         ->  Function Scan on generate_series g
+         ->  Hash
+               ->  Seq Scan on scan_table s
+   ->  Hash
+         ->  Seq Scan on scan_table s_1
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* matched */
+   SEQ_SCAN(s#2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   HASH_JOIN(s s#2)
+   SEQ_SCAN(s s#2)
+   NO_GATHER(g s s#2)
+(17 rows)
+
+COMMIT;
+-- Test our ability to refer to scans within a subquery.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_SCAN(s@x public.scan_table_pkey)
+   NO_GATHER(x s@x)
+(5 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(5 rows)
+
+BEGIN;
+-- Should not match.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@x public.scan_table_pkey)
+   NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+-- Should match first query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on scan_table s
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@x) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(s@x)
+   NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@x) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+-- Should match second query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@unnamed_subquery) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@x public.scan_table_pkey)
+   NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                    QUERY PLAN                    
+--------------------------------------------------
+ Seq Scan on scan_table s
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@unnamed_subquery) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(s@unnamed_subquery)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/syntax.out b/contrib/pg_plan_advice/expected/syntax.out

new file mode 100644 (file)

index 0000000..dddb12c
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/syntax.out
@@ -0,0 +1,59 @@
+LOAD 'pg_plan_advice';
+-- An empty string is allowed, and so is an empty target list.
+SET pg_plan_advice.advice = '';
+SET pg_plan_advice.advice = 'SEQ_SCAN()';
+-- Test assorted variations in capitalization, whitespace, and which parts of
+-- the relation identifier are included. These should all work.
+SET pg_plan_advice.advice = 'SEQ_SCAN(x)';
+SET pg_plan_advice.advice = 'seq_scan(x@y)';
+SET pg_plan_advice.advice = 'SEQ_scan(x#2)';
+SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)';
+SET pg_plan_advice.advice = '  SEQ_SCAN ( x / y . z )  ';
+SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")';
+-- Syntax errors.
+SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQUENTIAL_SCAN(x)"
+DETAIL:  Could not parse advice: syntax error at or near "SEQUENTIAL_SCAN"
+SET pg_plan_advice.advice = 'SEQ_SCAN';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN"
+DETAIL:  Could not parse advice: syntax error at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN(';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("
+DETAIL:  Could not parse advice: syntax error at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN("';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(""
+DETAIL:  Could not parse advice: unterminated quoted identifier at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN(#';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(#"
+DETAIL:  Could not parse advice: syntax error at or near "#"
+SET pg_plan_advice.advice = '()';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "()"
+DETAIL:  Could not parse advice: syntax error at or near "("
+SET pg_plan_advice.advice = '123';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "123"
+DETAIL:  Could not parse advice: syntax error at or near "123"
+-- Legal comments.
+SET pg_plan_advice.advice = '/**/';
+SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)';
+-- Unterminated comments.
+SET pg_plan_advice.advice = '/*';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "/*"
+DETAIL:  Could not parse advice: unterminated comment at end of input
+SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "JOIN_ORDER("fOO") /* oops"
+DETAIL:  Could not parse advice: unterminated comment at end of input
+-- Nested comments are not supported, so the first of these is legal and
+-- the second is not.
+SET pg_plan_advice.advice = '/*/*/';
+SET pg_plan_advice.advice = '/*/* stuff */*/';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "/*/* stuff */*/"
+DETAIL:  Could not parse advice: syntax error at or near "*"
+-- Foreign join requires multiple relation identifiers.
+SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN(a)"
+DETAIL:  Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")"
+SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN((a))"
+DETAIL:  Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")"
diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build

new file mode 100644 (file)

index 0000000..3452e5a
--- /dev/null
+++ b/contrib/pg_plan_advice/meson.build
@@ -0,0 +1,70 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+pg_plan_advice_sources = files(
+  'pg_plan_advice.c',
+  'pgpa_ast.c',
+  'pgpa_collector.c',
+  'pgpa_identifier.c',
+  'pgpa_join.c',
+  'pgpa_output.c',
+  'pgpa_planner.c',
+  'pgpa_scan.c',
+  'pgpa_trove.c',
+  'pgpa_walker.c',
+)
+
+pgpa_scanner = custom_target('pgpa_scanner',
+  input: 'pgpa_scanner.l',
+  output: 'pgpa_scanner.c',
+  command: flex_cmd,
+)
+generated_sources += pgpa_scanner
+pg_plan_advice_sources += pgpa_scanner
+
+pgpa_parser = custom_target('pgpa_parser',
+  input: 'pgpa_parser.y',
+  kwargs: bison_kw,
+)
+generated_sources += pgpa_parser.to_list()
+pg_plan_advice_sources += pgpa_parser
+
+if host_system == 'windows'
+  pg_plan_advice_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'pg_plan_advice',
+    '--FILEDESC', 'pg_plan_advice - help the planner get the right plan',])
+endif
+
+pg_plan_advice = shared_module('pg_plan_advice',
+  pg_plan_advice_sources,
+  include_directories: include_directories('.'),
+  kwargs: contrib_mod_args,
+)
+contrib_targets += pg_plan_advice
+
+install_data(
+  'pg_plan_advice--1.0.sql',
+  'pg_plan_advice.control',
+  kwargs: contrib_data_args,
+)
+
+tests += {
+  'name': 'pg_plan_advice',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'regress': {
+    'sql': [
+      'gather',
+      'join_order',
+      'join_strategy',
+      'local_collector',
+      'partitionwise',
+      'scan',
+      'syntax',
+    ],
+  },
+  'tap': {
+    'tests': [
+      't/001_regress.pl',
+    ],
+  },
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice--1.0.sql b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql

new file mode 100644 (file)

index 0000000..29f4f22
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql
@@ -0,0 +1,42 @@
+/* contrib/pg_plan_advice/pg_plan_advice--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_plan_advice" to load this file. \quit
+
+CREATE FUNCTION pg_clear_collected_local_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_clear_collected_shared_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_shared_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_local_advice(
+       OUT id bigint,
+       OUT userid oid,
+       OUT dbid oid,
+       OUT queryid bigint,
+       OUT collection_time timestamptz,
+       OUT query text,
+       OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_shared_advice(
+       OUT id bigint,
+       OUT userid oid,
+       OUT dbid oid,
+       OUT queryid bigint,
+       OUT collection_time timestamptz,
+       OUT query text,
+       OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_shared_advice'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION pg_get_collected_shared_advice() FROM PUBLIC;
diff --git a/contrib/pg_plan_advice/pg_plan_advice.c b/contrib/pg_plan_advice/pg_plan_advice.c

new file mode 100644 (file)

index 0000000..f32e8b7
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.c
@@ -0,0 +1,454 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.c
+ *       main entrypoints for generating and applying planner advice
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pg_plan_advice.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_ast.h"
+#include "pgpa_collector.h"
+#include "pgpa_identifier.h"
+#include "pgpa_output.h"
+#include "pgpa_planner.h"
+#include "pgpa_trove.h"
+#include "pgpa_walker.h"
+
+#include "commands/defrem.h"
+#include "commands/explain.h"
+#include "commands/explain_format.h"
+#include "commands/explain_state.h"
+#include "funcapi.h"
+#include "optimizer/planner.h"
+#include "storage/dsm_registry.h"
+#include "utils/guc.h"
+
+PG_MODULE_MAGIC;
+
+static pgpa_shared_state *pgpa_state = NULL;
+static dsa_area *pgpa_dsa_area = NULL;
+
+/* GUC variables */
+char      *pg_plan_advice_advice = NULL;
+static bool pg_plan_advice_always_explain_supplied_advice = true;
+int                    pg_plan_advice_local_collection_limit = 0;
+int                    pg_plan_advice_shared_collection_limit = 0;
+
+/* Saved hook value */
+static explain_per_plan_hook_type prev_explain_per_plan = NULL;
+
+/* Other file-level globals */
+static int     es_extension_id;
+static MemoryContext pgpa_memory_context = NULL;
+
+static void pg_plan_advice_explain_option_handler(ExplainState *es,
+                                                                                                 DefElem *opt,
+                                                                                                 ParseState *pstate);
+static void pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+                                                                                                IntoClause *into,
+                                                                                                ExplainState *es,
+                                                                                                const char *queryString,
+                                                                                                ParamListInfo params,
+                                                                                                QueryEnvironment *queryEnv);
+static bool pg_plan_advice_advice_check_hook(char **newval, void **extra,
+                                                                                        GucSource source);
+static DefElem *find_defelem_by_defname(List *deflist, char *defname);
+
+/*
+ * Initialize this module.
+ */
+void
+_PG_init(void)
+{
+       DefineCustomStringVariable("pg_plan_advice.advice",
+                                                          "advice to apply during query planning",
+                                                          NULL,
+                                                          &pg_plan_advice_advice,
+                                                          NULL,
+                                                          PGC_USERSET,
+                                                          0,
+                                                          pg_plan_advice_advice_check_hook,
+                                                          NULL,
+                                                          NULL);
+
+       DefineCustomBoolVariable("pg_plan_advice.always_explain_supplied_advice",
+                                                        "EXPLAIN output includes supplied advice even without EXPLAIN (PLAN_ADVICE)",
+                                                        NULL,
+                                                        &pg_plan_advice_always_explain_supplied_advice,
+                                                        true,
+                                                        PGC_USERSET,
+                                                        0,
+                                                        NULL,
+                                                        NULL,
+                                                        NULL);
+
+       DefineCustomIntVariable("pg_plan_advice.local_collection_limit",
+                                                       "# of advice entries to retain in per-backend memory",
+                                                       NULL,
+                                                       &pg_plan_advice_local_collection_limit,
+                                                       0,
+                                                       0, INT_MAX,
+                                                       PGC_USERSET,
+                                                       0,
+                                                       NULL,
+                                                       NULL,
+                                                       NULL);
+
+       DefineCustomIntVariable("pg_plan_advice.shared_collection_limit",
+                                                       "# of advice entries to retain in shared memory",
+                                                       NULL,
+                                                       &pg_plan_advice_shared_collection_limit,
+                                                       0,
+                                                       0, INT_MAX,
+                                                       PGC_SUSET,
+                                                       0,
+                                                       NULL,
+                                                       NULL,
+                                                       NULL);
+
+       MarkGUCPrefixReserved("pg_plan_advice");
+
+       /* Get an ID that we can use to cache data in an ExplainState. */
+       es_extension_id = GetExplainExtensionId("pg_plan_advice");
+
+       /* Register the new EXPLAIN options implemented by this module. */
+       RegisterExtensionExplainOption("plan_advice",
+                                                                  pg_plan_advice_explain_option_handler);
+
+       /* Install hooks */
+       pgpa_planner_install_hooks();
+       prev_explain_per_plan = explain_per_plan_hook;
+       explain_per_plan_hook = pg_plan_advice_explain_per_plan_hook;
+}
+
+/*
+ * Initialize shared state when first created.
+ */
+static void
+pgpa_init_shared_state(void *ptr)
+{
+       pgpa_shared_state *state = (pgpa_shared_state *) ptr;
+
+       LWLockInitialize(&state->lock, LWLockNewTrancheId("pg_plan_advice_lock"));
+       state->dsa_tranche = LWLockNewTrancheId("pg_plan_advice_dsa");
+       state->area = DSA_HANDLE_INVALID;
+       state->shared_collector = InvalidDsaPointer;
+}
+
+/*
+ * Return a pointer to a memory context where long-lived data managed by this
+ * module can be stored.
+ */
+MemoryContext
+pg_plan_advice_get_mcxt(void)
+{
+       if (pgpa_memory_context == NULL)
+               pgpa_memory_context = AllocSetContextCreate(TopMemoryContext,
+                                                                                                       "pg_plan_advice",
+                                                                                                       ALLOCSET_DEFAULT_SIZES);
+
+       return pgpa_memory_context;
+}
+
+/*
+ * Get a pointer to our shared state.
+ *
+ * If no shared state exists, create and initialize it. If it does exist but
+ * this backend has not yet accessed it, attach to it. Otherwise, just return
+ * our cached pointer.
+ *
+ * Along the way, make sure the relevant LWLock tranches are registered.
+ */
+pgpa_shared_state *
+pg_plan_advice_attach(void)
+{
+       if (pgpa_state == NULL)
+       {
+               bool            found;
+
+               pgpa_state =
+                       GetNamedDSMSegment("pg_plan_advice", sizeof(pgpa_shared_state),
+                                                          pgpa_init_shared_state, &found);
+       }
+
+       return pgpa_state;
+}
+
+/*
+ * Return a pointer to pg_plan_advice's DSA area, creating it if needed.
+ */
+dsa_area *
+pg_plan_advice_dsa_area(void)
+{
+       if (pgpa_dsa_area == NULL)
+       {
+               pgpa_shared_state *state = pg_plan_advice_attach();
+               dsa_handle      area_handle;
+               MemoryContext oldcontext;
+
+               oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+
+               LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+               area_handle = state->area;
+               if (area_handle == DSA_HANDLE_INVALID)
+               {
+                       pgpa_dsa_area = dsa_create(state->dsa_tranche);
+                       dsa_pin(pgpa_dsa_area);
+                       state->area = dsa_get_handle(pgpa_dsa_area);
+                       LWLockRelease(&state->lock);
+               }
+               else
+               {
+                       LWLockRelease(&state->lock);
+                       pgpa_dsa_area = dsa_attach(area_handle);
+               }
+
+               dsa_pin_mapping(pgpa_dsa_area);
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       return pgpa_dsa_area;
+}
+
+/*
+ * Handler for EXPLAIN (PLAN_ADVICE).
+ */
+static void
+pg_plan_advice_explain_option_handler(ExplainState *es, DefElem *opt,
+                                                                         ParseState *pstate)
+{
+       bool       *plan_advice;
+
+       plan_advice = GetExplainExtensionState(es, es_extension_id);
+
+       if (plan_advice == NULL)
+       {
+               plan_advice = palloc0_object(bool);
+               SetExplainExtensionState(es, es_extension_id, plan_advice);
+       }
+
+       *plan_advice = defGetBoolean(opt);
+}
+
+/*
+ * Display a string that is likely to consist of multiple lines in EXPLAIN
+ * output.
+ */
+static void
+pg_plan_advice_explain_text_multiline(ExplainState *es, char *qlabel,
+                                                                         char *value)
+{
+       char       *s;
+
+       /* For non-text formats, it's best not to add any special handling. */
+       if (es->format != EXPLAIN_FORMAT_TEXT)
+       {
+               ExplainPropertyText(qlabel, value, es);
+               return;
+       }
+
+       /* In text format, if there is no data, display nothing. */
+       if (*qlabel == '\0')
+               return;
+
+       /*
+        * It looks nicest to indent each line of the advice separately, beginning
+        * on the line below the label.
+        */
+       ExplainIndentText(es);
+       appendStringInfo(es->str, "%s:\n", qlabel);
+       es->indent++;
+       while ((s = strchr(value, '\n')) != NULL)
+       {
+               ExplainIndentText(es);
+               appendBinaryStringInfo(es->str, value, (s - value) + 1);
+               value = s + 1;
+       }
+
+       /* Don't interpret a terminal newline as a request for an empty line. */
+       if (*value != '\0')
+       {
+               ExplainIndentText(es);
+               appendStringInfo(es->str, "%s\n", value);
+       }
+
+       es->indent--;
+}
+
+/*
+ * Add advice feedback to the EXPLAIN output.
+ */
+static void
+pg_plan_advice_explain_feedback(ExplainState *es, List *feedback)
+{
+       StringInfoData buf;
+
+       initStringInfo(&buf);
+       foreach_node(DefElem, item, feedback)
+       {
+               int                     flags = defGetInt32(item);
+
+               appendStringInfo(&buf, "%s /* ", item->defname);
+               if ((flags & PGPA_TE_MATCH_FULL) != 0)
+               {
+                       Assert((flags & PGPA_TE_MATCH_PARTIAL) != 0);
+                       appendStringInfo(&buf, "matched");
+               }
+               else if ((flags & PGPA_TE_MATCH_PARTIAL) != 0)
+                       appendStringInfo(&buf, "partially matched");
+               else
+                       appendStringInfo(&buf, "not matched");
+               if ((flags & PGPA_TE_INAPPLICABLE) != 0)
+                       appendStringInfo(&buf, ", inapplicable");
+               if ((flags & PGPA_TE_CONFLICTING) != 0)
+                       appendStringInfo(&buf, ", conflicting");
+               if ((flags & PGPA_TE_FAILED) != 0)
+                       appendStringInfo(&buf, ", failed");
+               appendStringInfo(&buf, " */\n");
+       }
+
+       pg_plan_advice_explain_text_multiline(es, "Supplied Plan Advice",
+                                                                                 buf.data);
+}
+
+/*
+ * Add relevant details, if any, to the EXPLAIN output for a single plan.
+ */
+static void
+pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+                                                                        IntoClause *into,
+                                                                        ExplainState *es,
+                                                                        const char *queryString,
+                                                                        ParamListInfo params,
+                                                                        QueryEnvironment *queryEnv)
+{
+       bool       *plan_advice = GetExplainExtensionState(es, es_extension_id);
+       DefElem    *pgpa_item;
+       List       *pgpa_list;
+
+       if (prev_explain_per_plan)
+               prev_explain_per_plan(plannedstmt, into, es, queryString, params,
+                                                         queryEnv);
+
+       /* Find any data pgpa_planner_shutdown stashed in the PlannedStmt. */
+       pgpa_item = find_defelem_by_defname(plannedstmt->extension_state,
+                                                                               "pg_plan_advice");
+       pgpa_list = pgpa_item == NULL ? NULL : (List *) pgpa_item->arg;
+
+       /*
+        * By default, if there is a record of attempting to apply advice during
+        * query planning, we always output that information, but the user can set
+        * pg_plan_advice.always_explain_supplied_advice = false to suppress that
+        * behavior. If they do, we'll only display it when the PLAN_ADVICE option
+        * was specified and not set to false.
+        *
+        * NB: If we're explaining a query planned beforehand -- i.e. a prepared
+        * statement -- the application of query advice may not have been
+        * recorded, and therefore this won't be able to show anything.
+        */
+       if (pgpa_list != NULL && (pg_plan_advice_always_explain_supplied_advice ||
+                                                         (plan_advice != NULL && *plan_advice)))
+       {
+               DefElem    *feedback;
+
+               feedback = find_defelem_by_defname(pgpa_list, "feedback");
+               if (feedback != NULL)
+                       pg_plan_advice_explain_feedback(es, (List *) feedback->arg);
+       }
+
+       /*
+        * If the PLAN_ADVICE option was specified -- and not sent to FALSE --
+        * show generated advice.
+        */
+       if (plan_advice != NULL && *plan_advice)
+       {
+               DefElem    *advice_string_item;
+               char       *advice_string;
+
+               advice_string_item =
+                       find_defelem_by_defname(pgpa_list, "advice_string");
+               if (advice_string_item != NULL)
+               {
+                       /* Advice has already been generated; we can reuse it. */
+                       advice_string = strVal(advice_string_item->arg);
+               }
+               else
+               {
+                       pgpa_plan_walker_context walker;
+                       StringInfoData buf;
+                       pgpa_identifier *rt_identifiers;
+
+                       /* Advice not yet generated; do that now. */
+                       pgpa_plan_walker(&walker, plannedstmt);
+                       rt_identifiers =
+                               pgpa_create_identifiers_for_planned_stmt(plannedstmt);
+                       initStringInfo(&buf);
+                       pgpa_output_advice(&buf, &walker, rt_identifiers);
+                       advice_string = buf.data;
+               }
+
+               if (advice_string[0] != '\0')
+                       pg_plan_advice_explain_text_multiline(es, "Generated Plan Advice",
+                                                                                                 advice_string);
+       }
+}
+
+/*
+ * Check hook for pg_plan_advice.advice
+ */
+static bool
+pg_plan_advice_advice_check_hook(char **newval, void **extra, GucSource source)
+{
+       MemoryContext oldcontext;
+       MemoryContext tmpcontext;
+       char       *error;
+
+       if (*newval == NULL)
+               return true;
+
+       tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+                                                                          "pg_plan_advice.advice",
+                                                                          ALLOCSET_DEFAULT_SIZES);
+       oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+       /*
+        * It would be nice to save the parse tree that we construct here for
+        * eventual use when planning with this advice, but *extra can only point
+        * to a single guc_malloc'd chunk, and our parse tree involves an
+        * arbitrary number of memory allocations.
+        */
+       (void) pgpa_parse(*newval, &error);
+
+       if (error != NULL)
+       {
+               GUC_check_errdetail("Could not parse advice: %s", error);
+               return false;
+       }
+
+       MemoryContextSwitchTo(oldcontext);
+       MemoryContextDelete(tmpcontext);
+
+       return true;
+}
+
+/*
+ * Search a list of DefElem objects for a given defname.
+ */
+static DefElem *
+find_defelem_by_defname(List *deflist, char *defname)
+{
+       foreach_node(DefElem, item, deflist)
+       {
+               if (strcmp(item->defname, defname) == 0)
+                       return item;
+       }
+
+       return NULL;
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice.control b/contrib/pg_plan_advice/pg_plan_advice.control

new file mode 100644 (file)

index 0000000..aa6fdc9
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.control
@@ -0,0 +1,5 @@
+# pg_plan_advice extension
+comment = 'help the planner get the right plan'
+default_version = '1.0'
+module_pathname = '$libdir/pg_plan_advice'
+relocatable = true
diff --git a/contrib/pg_plan_advice/pg_plan_advice.h b/contrib/pg_plan_advice/pg_plan_advice.h

new file mode 100644 (file)

index 0000000..86efb3b
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.h
@@ -0,0 +1,37 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.h
+ *       main header file for pg_plan_advice contrib module
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pg_plan_advice.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PLAN_ADVICE_H
+#define PG_PLAN_ADVICE_H
+
+#include "nodes/plannodes.h"
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
+
+typedef struct pgpa_shared_state
+{
+       LWLock          lock;
+       int                     dsa_tranche;
+       dsa_handle      area;
+       dsa_pointer shared_collector;
+} pgpa_shared_state;
+
+/* GUC variables */
+extern int     pg_plan_advice_local_collection_limit;
+extern int     pg_plan_advice_shared_collection_limit;
+extern char *pg_plan_advice_advice;
+
+/* Function prototypes */
+extern MemoryContext pg_plan_advice_get_mcxt(void);
+extern pgpa_shared_state *pg_plan_advice_attach(void);
+extern dsa_area *pg_plan_advice_dsa_area(void);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_ast.c b/contrib/pg_plan_advice/pgpa_ast.c

new file mode 100644 (file)

index 0000000..02ffbfa
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_ast.c
@@ -0,0 +1,392 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_ast.c
+ *       additional supporting code related to plan advice parsing
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_ast.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_ast.h"
+
+#include "funcapi.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+static bool pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids,
+                                                                                 pgpa_advice_target *target,
+                                                                                 bool *rids_used);
+
+/*
+ * Get a C string that corresponds to the specified advice tag.
+ */
+char *
+pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag)
+{
+       switch (advice_tag)
+       {
+               case PGPA_TAG_BITMAP_HEAP_SCAN:
+                       return "BITMAP_HEAP_SCAN";
+               case PGPA_TAG_FOREIGN_JOIN:
+                       return "FOREIGN_JOIN";
+               case PGPA_TAG_GATHER:
+                       return "GATHER";
+               case PGPA_TAG_GATHER_MERGE:
+                       return "GATHER_MERGE";
+               case PGPA_TAG_HASH_JOIN:
+                       return "HASH_JOIN";
+               case PGPA_TAG_INDEX_ONLY_SCAN:
+                       return "INDEX_ONLY_SCAN";
+               case PGPA_TAG_INDEX_SCAN:
+                       return "INDEX_SCAN";
+               case PGPA_TAG_JOIN_ORDER:
+                       return "JOIN_ORDER";
+               case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       return "MERGE_JOIN_MATERIALIZE";
+               case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       return "MERGE_JOIN_PLAIN";
+               case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       return "NESTED_LOOP_MATERIALIZE";
+               case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       return "NESTED_LOOP_MEMOIZE";
+               case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       return "NESTED_LOOP_PLAIN";
+               case PGPA_TAG_NO_GATHER:
+                       return "NO_GATHER";
+               case PGPA_TAG_PARTITIONWISE:
+                       return "PARTITIONWISE";
+               case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+                       return "SEMIJOIN_NON_UNIQUE";
+               case PGPA_TAG_SEMIJOIN_UNIQUE:
+                       return "SEMIJOIN_UNIQUE";
+               case PGPA_TAG_SEQ_SCAN:
+                       return "SEQ_SCAN";
+               case PGPA_TAG_TID_SCAN:
+                       return "TID_SCAN";
+       }
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Convert an advice tag, formatted as a string that has already been
+ * downcased as appropriate, to a pgpa_advice_tag_type.
+ *
+ * If we succeed, set *fail = false and return the result; if we fail,
+ * set *fail = true and reurn an arbitrary value.
+ */
+pgpa_advice_tag_type
+pgpa_parse_advice_tag(const char *tag, bool *fail)
+{
+       *fail = false;
+
+       switch (tag[0])
+       {
+               case 'b':
+                       if (strcmp(tag, "bitmap_heap_scan") == 0)
+                               return PGPA_TAG_BITMAP_HEAP_SCAN;
+                       break;
+               case 'f':
+                       if (strcmp(tag, "foreign_join") == 0)
+                               return PGPA_TAG_FOREIGN_JOIN;
+                       break;
+               case 'g':
+                       if (strcmp(tag, "gather") == 0)
+                               return PGPA_TAG_GATHER;
+                       if (strcmp(tag, "gather_merge") == 0)
+                               return PGPA_TAG_GATHER_MERGE;
+                       break;
+               case 'h':
+                       if (strcmp(tag, "hash_join") == 0)
+                               return PGPA_TAG_HASH_JOIN;
+                       break;
+               case 'i':
+                       if (strcmp(tag, "index_scan") == 0)
+                               return PGPA_TAG_INDEX_SCAN;
+                       if (strcmp(tag, "index_only_scan") == 0)
+                               return PGPA_TAG_INDEX_ONLY_SCAN;
+                       break;
+               case 'j':
+                       if (strcmp(tag, "join_order") == 0)
+                               return PGPA_TAG_JOIN_ORDER;
+                       break;
+               case 'm':
+                       if (strcmp(tag, "merge_join_materialize") == 0)
+                               return PGPA_TAG_MERGE_JOIN_MATERIALIZE;
+                       if (strcmp(tag, "merge_join_plain") == 0)
+                               return PGPA_TAG_MERGE_JOIN_PLAIN;
+                       break;
+               case 'n':
+                       if (strcmp(tag, "nested_loop_materialize") == 0)
+                               return PGPA_TAG_NESTED_LOOP_MATERIALIZE;
+                       if (strcmp(tag, "nested_loop_memoize") == 0)
+                               return PGPA_TAG_NESTED_LOOP_MEMOIZE;
+                       if (strcmp(tag, "nested_loop_plain") == 0)
+                               return PGPA_TAG_NESTED_LOOP_PLAIN;
+                       if (strcmp(tag, "no_gather") == 0)
+                               return PGPA_TAG_NO_GATHER;
+                       break;
+               case 'p':
+                       if (strcmp(tag, "partitionwise") == 0)
+                               return PGPA_TAG_PARTITIONWISE;
+                       break;
+               case 's':
+                       if (strcmp(tag, "semijoin_non_unique") == 0)
+                               return PGPA_TAG_SEMIJOIN_NON_UNIQUE;
+                       if (strcmp(tag, "semijoin_unique") == 0)
+                               return PGPA_TAG_SEMIJOIN_UNIQUE;
+                       if (strcmp(tag, "seq_scan") == 0)
+                               return PGPA_TAG_SEQ_SCAN;
+                       break;
+               case 't':
+                       if (strcmp(tag, "tid_scan") == 0)
+                               return PGPA_TAG_TID_SCAN;
+                       break;
+       }
+
+       /* didn't work out */
+       *fail = true;
+
+       /* return an arbitrary value to unwind the call stack */
+       return PGPA_TAG_SEQ_SCAN;
+}
+
+/*
+ * Format a pgpa_advice_target as a string and append result to a StringInfo.
+ */
+void
+pgpa_format_advice_target(StringInfo str, pgpa_advice_target *target)
+{
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               bool            first = true;
+               char       *delims;
+
+               if (target->ttype == PGPA_TARGET_UNORDERED_LIST)
+                       delims = "{}";
+               else
+                       delims = "()";
+
+               appendStringInfoChar(str, delims[0]);
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       if (first)
+                               first = false;
+                       else
+                               appendStringInfoChar(str, ' ');
+                       pgpa_format_advice_target(str, child_target);
+               }
+               appendStringInfoChar(str, delims[1]);
+       }
+       else
+       {
+               const char *rt_identifier;
+
+               rt_identifier = pgpa_identifier_string(&target->rid);
+               appendStringInfoString(str, rt_identifier);
+       }
+}
+
+/*
+ * Format a pgpa_index_target as a string and append result to a StringInfo.
+ */
+void
+pgpa_format_index_target(StringInfo str, pgpa_index_target *itarget)
+{
+       if (itarget->itype != PGPA_INDEX_NAME)
+       {
+               bool            first = true;
+
+               if (itarget->itype == PGPA_INDEX_AND)
+                       appendStringInfoString(str, "&&(");
+               else
+                       appendStringInfoString(str, "||(");
+
+               foreach_ptr(pgpa_index_target, child_target, itarget->children)
+               {
+                       if (first)
+                               first = false;
+                       else
+                               appendStringInfoChar(str, ' ');
+                       pgpa_format_index_target(str, child_target);
+               }
+               appendStringInfoChar(str, ')');
+       }
+       else
+       {
+               if (itarget->indnamespace != NULL)
+                       appendStringInfo(str, "%s.",
+                                                        quote_identifier(itarget->indnamespace));
+               appendStringInfoString(str, quote_identifier(itarget->indname));
+       }
+}
+
+/*
+ * Determine whether two pgpa_index_target objects are exactly identical.
+ */
+bool
+pgpa_index_targets_equal(pgpa_index_target *i1, pgpa_index_target *i2)
+{
+       if (i1->itype != i2->itype)
+               return false;
+
+       if (i1->itype == PGPA_INDEX_NAME)
+       {
+               /* indnamespace can be NULL, and two NULL values are equal */
+               if ((i1->indnamespace != NULL || i2->indnamespace != NULL) &&
+                       (i1->indnamespace == NULL || i2->indnamespace == NULL ||
+                        strcmp(i1->indnamespace, i2->indnamespace) != 0))
+                       return false;
+               if (strcmp(i1->indname, i2->indname) != 0)
+                       return false;
+       }
+       else
+       {
+               int                     i1_length = list_length(i1->children);
+
+               if (i1_length != list_length(i2->children))
+                       return false;
+               for (int n = 0; n < i1_length; ++n)
+               {
+                       pgpa_index_target *c1 = list_nth(i1->children, n);
+                       pgpa_index_target *c2 = list_nth(i2->children, n);
+
+                       if (!pgpa_index_targets_equal(c1, c2))
+                               return false;
+               }
+       }
+
+       return true;
+}
+
+/*
+ * Check whether an identifier matches an any part of an advice target.
+ */
+bool
+pgpa_identifier_matches_target(pgpa_identifier *rid, pgpa_advice_target *target)
+{
+       /* For non-identifiers, check all descendents. */
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       if (pgpa_identifier_matches_target(rid, child_target))
+                               return true;
+               }
+               return false;
+       }
+
+       if (strcmp(rid->alias_name, target->rid.alias_name) != 0)
+               return false;
+       if (rid->occurrence != target->rid.occurrence)
+               return false;
+
+       /*
+        * The identifier must specify a schema, but the target may leave the
+        * schema NULL to match anything.
+        */
+       if (target->rid.partnsp != NULL &&
+               strcmp(rid->partnsp, target->rid.partnsp) != 0)
+               return false;
+
+
+       /*
+        * These fields can be NULL on either side, but NULL only matches another
+        * NULL.
+        */
+       if (!strings_equal_or_both_null(rid->partrel, target->rid.partrel))
+               return false;
+       if (!strings_equal_or_both_null(rid->plan_name, target->rid.plan_name))
+               return false;
+
+       return true;
+}
+
+/*
+ * Match identifiers to advice targets and return an enum value indicating
+ * the relationship between the set of keys and the set of targets.
+ *
+ * See the comments for pgpa_itm_type.
+ */
+pgpa_itm_type
+pgpa_identifiers_match_target(int nrids, pgpa_identifier *rids,
+                                                         pgpa_advice_target *target)
+{
+       bool            all_rids_used = true;
+       bool            any_rids_used = false;
+       bool            all_targets_used;
+       bool       *rids_used = palloc0_array(bool, nrids);
+
+       all_targets_used =
+               pgpa_identifiers_cover_target(nrids, rids, target, rids_used);
+
+       for (int i = 0; i < nrids; ++i)
+       {
+               if (rids_used[i])
+                       any_rids_used = true;
+               else
+                       all_rids_used = false;
+       }
+
+       if (all_rids_used)
+       {
+               if (all_targets_used)
+                       return PGPA_ITM_EQUAL;
+               else
+                       return PGPA_ITM_KEYS_ARE_SUBSET;
+       }
+       else
+       {
+               if (all_targets_used)
+                       return PGPA_ITM_TARGETS_ARE_SUBSET;
+               else if (any_rids_used)
+                       return PGPA_ITM_INTERSECTING;
+               else
+                       return PGPA_ITM_DISJOINT;
+       }
+}
+
+/*
+ * Returns true if every target or sub-target is matched by at least one
+ * identifier, and otherwise false.
+ *
+ * Also sets rids_used[i] = true for each idenifier that matches at least one
+ * target.
+ */
+static bool
+pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids,
+                                                         pgpa_advice_target *target, bool *rids_used)
+{
+       bool            result = false;
+
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               result = true;
+
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       if (!pgpa_identifiers_cover_target(nrids, rids, child_target,
+                                                                                          rids_used))
+                               result = false;
+               }
+       }
+       else
+       {
+               for (int i = 0; i < nrids; ++i)
+               {
+                       if (pgpa_identifier_matches_target(&rids[i], target))
+                       {
+                               rids_used[i] = true;
+                               result = true;
+                       }
+               }
+       }
+
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_ast.h b/contrib/pg_plan_advice/pgpa_ast.h

new file mode 100644 (file)

index 0000000..f6fe730
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_ast.h
@@ -0,0 +1,204 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_ast.h
+ *       abstract syntax trees for plan advice, plus parser/scanner support
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_ast.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_AST_H
+#define PGPA_AST_H
+
+#include "pgpa_identifier.h"
+
+#include "nodes/pg_list.h"
+
+/*
+ * Advice items generally take the form SOME_TAG(item [...]), where an item
+ * can take various forms. The simplest case is a relation identifier, but
+ * some tags allow sublists, and JOIN_ORDER() allows both ordered and unordered
+ * sublists.
+ */
+typedef enum
+{
+       PGPA_TARGET_IDENTIFIER,         /* relation identifier */
+       PGPA_TARGET_ORDERED_LIST,       /* (item ...) */
+       PGPA_TARGET_UNORDERED_LIST      /* {item ...} */
+} pgpa_target_type;
+
+/*
+ * When an advice item describes a bitmap index scan, it may need to describe
+ * the use of multiple indexes.
+ */
+typedef enum
+{
+       PGPA_INDEX_NAME,                        /* index schema + name */
+       PGPA_INDEX_AND,                         /* &&(item ...) */
+       PGPA_INDEX_OR                           /* ||(item ...) */
+} pgpa_index_type;
+
+/*
+ * An index specification. We use this for INDEX_SCAN, INDEX_ONLY_SCAN,
+ * and BITMAP_HEAP_SCAN advice, but in the former two cases, the target must
+ * be of type PGPA_INDEX_NAME.
+ */
+typedef struct pgpa_index_target
+{
+       pgpa_index_type itype;
+
+       /* Index schem and name, when itype == PGPA_INDEX_NAME */
+       char       *indnamespace;
+       char       *indname;
+
+       /* List of pgpa_index_target objects, when itype != PGPA_INDEX_NAME */
+       List       *children;
+} pgpa_index_target;
+
+/*
+ * A single item about which advice is being given, which could be either
+ * a relation identifier that we want to break out into its constituent fields,
+ * or a sublist of some kind.
+ */
+typedef struct pgpa_advice_target
+{
+       pgpa_target_type ttype;
+
+       /*
+        * This field is meaningful when ttype is PGPA_TARGET_IDENTIFIER.
+        *
+        * All identifiers must have an alias name and an occurrence number; the
+        * remaining fields can be NULL. Note that it's possible to specify a
+        * partition name without a partition schema, but not the reverse.
+        */
+       pgpa_identifier rid;
+
+       /*
+        * This field is set when ttype is PPGA_TARGET_IDENTIFIER and the advice
+        * tag is PGPA_TAG_INDEX_SCAN, PGPA_TAG_INDEX_ONLY_SCAN, or
+        * PGPA_TAG_BITMAP_HEAP_SCAN.
+        */
+       pgpa_index_target *itarget;
+
+       /*
+        * When the ttype is PGPA_TARGET_<anything>_LIST, this field contains a
+        * list of additional pgpa_advice_target objects. Otherwise, it is unused.
+        */
+       List       *children;
+} pgpa_advice_target;
+
+/*
+ * These are all the kinds of advice that we know how to parse. If a keyword
+ * is found at the top level, it must be in this list.
+ *
+ * If you change anything here, also update pgpa_parse_advice_tag and
+ * pgpa_cstring_advice_tag.
+ */
+typedef enum pgpa_advice_tag_type
+{
+       PGPA_TAG_BITMAP_HEAP_SCAN,
+       PGPA_TAG_FOREIGN_JOIN,
+       PGPA_TAG_GATHER,
+       PGPA_TAG_GATHER_MERGE,
+       PGPA_TAG_HASH_JOIN,
+       PGPA_TAG_INDEX_ONLY_SCAN,
+       PGPA_TAG_INDEX_SCAN,
+       PGPA_TAG_JOIN_ORDER,
+       PGPA_TAG_MERGE_JOIN_MATERIALIZE,
+       PGPA_TAG_MERGE_JOIN_PLAIN,
+       PGPA_TAG_NESTED_LOOP_MATERIALIZE,
+       PGPA_TAG_NESTED_LOOP_MEMOIZE,
+       PGPA_TAG_NESTED_LOOP_PLAIN,
+       PGPA_TAG_NO_GATHER,
+       PGPA_TAG_PARTITIONWISE,
+       PGPA_TAG_SEMIJOIN_NON_UNIQUE,
+       PGPA_TAG_SEMIJOIN_UNIQUE,
+       PGPA_TAG_SEQ_SCAN,
+       PGPA_TAG_TID_SCAN
+} pgpa_advice_tag_type;
+
+/*
+ * An item of advice, meaning a tag and the list of all targets to which
+ * it is being applied.
+ *
+ * "targets" is a list of pgpa_advice_target objects.
+ *
+ * The List returned from pgpa_yyparse is list of pgpa_advice_item objects.
+ */
+typedef struct pgpa_advice_item
+{
+       pgpa_advice_tag_type tag;
+       List       *targets;
+} pgpa_advice_item;
+
+/*
+ * Result of comparing an array of pgpa_relation_identifier objects to a
+ * pgpa_advice_target.
+ *
+ * PGPA_ITM_EQUAL means all targets are matched by some identifier, and
+ * all identifiers were matched to a target.
+ *
+ * PGPA_ITM_KEYS_ARE_SUBSET means that all identifiers matched to a target,
+ * but there were leftover targets. Generally, this means that the advice is
+ * looking to apply to all of the rels we have plus some additional ones that
+ * we don't have.
+ *
+ * PGPA_ITM_TARGETS_ARE_SUBSET means that all targets are matched by an
+ * identifiers, but there were leftover identifiers. Generally, this means
+ * that the advice is looking to apply to some but not all of the rels we have.
+ *
+ * PGPA_ITM_INTERSECTING means that some identifeirs and targets were matched,
+ * but neither all identifiers nor all targets could be matched to items in
+ * the other set.
+ *
+ * PGPA_ITM_DISJOINT means that no matches between identifeirs and targets were
+ * found.
+ */
+typedef enum
+{
+       PGPA_ITM_EQUAL,
+       PGPA_ITM_KEYS_ARE_SUBSET,
+       PGPA_ITM_TARGETS_ARE_SUBSET,
+       PGPA_ITM_INTERSECTING,
+       PGPA_ITM_DISJOINT
+} pgpa_itm_type;
+
+/* for pgpa_scanner.l and pgpa_parser.y */
+union YYSTYPE;
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+#endif
+
+/* in pgpa_scanner.l */
+extern int     pgpa_yylex(union YYSTYPE *yylval_param, List **result,
+                                          char **parse_error_msg_p, yyscan_t yyscanner);
+extern void pgpa_yyerror(List **result, char **parse_error_msg_p,
+                                                yyscan_t yyscanner,
+                                                const char *message);
+extern void pgpa_scanner_init(const char *str, yyscan_t *yyscannerp);
+extern void pgpa_scanner_finish(yyscan_t yyscanner);
+
+/* in pgpa_parser.y */
+extern int     pgpa_yyparse(List **result, char **parse_error_msg_p,
+                                                yyscan_t yyscanner);
+extern List *pgpa_parse(const char *advice_string, char **error_p);
+
+/* in pgpa_ast.c */
+extern char *pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag);
+extern bool pgpa_identifier_matches_target(pgpa_identifier *rid,
+                                                                                  pgpa_advice_target *target);
+extern pgpa_itm_type pgpa_identifiers_match_target(int nrids,
+                                                                                                  pgpa_identifier *rids,
+                                                                                                  pgpa_advice_target *target);
+extern bool pgpa_index_targets_equal(pgpa_index_target *i1,
+                                                                        pgpa_index_target *i2);
+extern pgpa_advice_tag_type pgpa_parse_advice_tag(const char *tag, bool *fail);
+extern void pgpa_format_advice_target(StringInfo str,
+                                                                         pgpa_advice_target *target);
+extern void pgpa_format_index_target(StringInfo str,
+                                                                        pgpa_index_target *itarget);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_collector.c b/contrib/pg_plan_advice/pgpa_collector.c

new file mode 100644 (file)

index 0000000..12085d9
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_collector.c
@@ -0,0 +1,637 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_collector.c
+ *       collect advice into backend-local or shared memory
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_collector.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+
+#include "datatype/timestamp.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/timestamp.h"
+
+PG_FUNCTION_INFO_V1(pg_clear_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_clear_collected_shared_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_shared_advice);
+
+#define ADVICE_CHUNK_SIZE              1024
+#define ADVICE_CHUNK_ARRAY_SIZE        64
+
+#define        PG_GET_ADVICE_COLUMNS   7
+
+/*
+ * Advice extracted from one query plan, together with the query string
+ * and various other identifying details.
+ */
+typedef struct pgpa_collected_advice
+{
+       Oid                     userid;                 /* user OID */
+       Oid                     dbid;                   /* database OID */
+       uint64          queryid;                /* query identifier */
+       TimestampTz timestamp;          /* query timestamp */
+       int                     advice_offset;  /* start of advice in textual data */
+       char            textual_data[FLEXIBLE_ARRAY_MEMBER];
+} pgpa_collected_advice;
+
+/*
+ * A bunch of pointers to pgpa_collected_advice objects, stored in
+ * backend-local memory.
+ */
+typedef struct pgpa_local_advice_chunk
+{
+       pgpa_collected_advice *entries[ADVICE_CHUNK_SIZE];
+} pgpa_local_advice_chunk;
+
+/*
+ * Information about all of the pgpa_collected_advice objects that we're
+ * storing in local memory.
+ *
+ * We assign consecutive IDs, starting from 0, to each pgpa_collected_advice
+ * object that we store. The actual storage is an array of chunks, which
+ * helps keep memcpy() overhead low when we start discarding older data.
+ */
+typedef struct pgpa_local_advice
+{
+       uint64          next_id;
+       uint64          oldest_id;
+       uint64          base_id;
+       int                     chunk_array_allocated_size;
+       pgpa_local_advice_chunk **chunks;
+} pgpa_local_advice;
+
+/*
+ * Just like pgpa_local_advice_chunk, but stored in a dynamic shared area,
+ * so we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice_chunk
+{
+       dsa_pointer entries[ADVICE_CHUNK_SIZE];
+} pgpa_shared_advice_chunk;
+
+/*
+ * Just like pgpa_local_advice, but stored in a dynamic shared area, so
+ * we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice
+{
+       uint64          next_id;
+       uint64          oldest_id;
+       uint64          base_id;
+       int                     chunk_array_allocated_size;
+       dsa_pointer chunks;
+} pgpa_shared_advice;
+
+/* Pointers to local and shared collectors */
+static pgpa_local_advice *local_collector = NULL;
+static pgpa_shared_advice *shared_collector = NULL;
+
+/* Static functions */
+static pgpa_collected_advice *pgpa_make_collected_advice(Oid userid,
+                                                                                                                Oid dbid,
+                                                                                                                uint64 queryId,
+                                                                                                                TimestampTz timestamp,
+                                                                                                                const char *query_string,
+                                                                                                                const char *advice_string,
+                                                                                                                dsa_area *area,
+                                                                                                                dsa_pointer *result);
+static void pgpa_store_local_advice(pgpa_collected_advice *ca);
+static void pgpa_trim_local_advice(int limit);
+static void pgpa_store_shared_advice(dsa_pointer ca_pointer);
+static void pgpa_trim_shared_advice(dsa_area *area, int limit);
+
+/* Helper function to extract the query string from pgpa_collected_advice */
+static inline const char *
+query_string(pgpa_collected_advice *ca)
+{
+       return ca->textual_data;
+}
+
+/* Helper function to extract the advice string from pgpa_collected_advice */
+static inline const char *
+advice_string(pgpa_collected_advice *ca)
+{
+       return ca->textual_data + ca->advice_offset;
+}
+
+/*
+ * Store collected query advice into the local or shared advice collector,
+ * as appropriate.
+ */
+void
+pgpa_collect_advice(uint64 queryId, const char *query_string,
+                                       const char *advice_string)
+{
+       Oid                     userid = GetUserId();
+       Oid                     dbid = MyDatabaseId;
+       TimestampTz now = GetCurrentTimestamp();
+
+       if (pg_plan_advice_local_collection_limit > 0)
+       {
+               pgpa_collected_advice *ca;
+               MemoryContext oldcontext;
+
+               oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+               ca = pgpa_make_collected_advice(userid, dbid, queryId, now,
+                                                                               query_string, advice_string,
+                                                                               NULL, NULL);
+               pgpa_store_local_advice(ca);
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       if (pg_plan_advice_shared_collection_limit > 0)
+       {
+               dsa_area   *area = pg_plan_advice_dsa_area();
+               dsa_pointer ca_pointer;
+
+               pgpa_make_collected_advice(userid, dbid, queryId, now,
+                                                                  query_string, advice_string, area,
+                                                                  &ca_pointer);
+               pgpa_store_shared_advice(ca_pointer);
+       }
+}
+
+/*
+ * Allocate and fill a new pgpa_collected_advice object.
+ *
+ * If area != NULL, it is used to allocate the new object, and the resulting
+ * dsa_pointer is returned via *result.
+ *
+ * If area == NULL, the new object is allocated in the current memory context,
+ * and result is not examined or modified.
+ */
+static pgpa_collected_advice *
+pgpa_make_collected_advice(Oid userid, Oid dbid, uint64 queryId,
+                                                  TimestampTz timestamp,
+                                                  const char *query_string,
+                                                  const char *advice_string,
+                                                  dsa_area *area, dsa_pointer *result)
+{
+       size_t          query_string_length = strlen(query_string) + 1;
+       size_t          advice_string_length = strlen(advice_string) + 1;
+       size_t          total_length;
+       pgpa_collected_advice *ca;
+
+       total_length = offsetof(pgpa_collected_advice, textual_data)
+               + query_string_length + advice_string_length;
+
+       if (area == NULL)
+               ca = palloc(total_length);
+       else
+       {
+               *result = dsa_allocate(area, total_length);
+               ca = dsa_get_address(area, *result);
+       }
+
+       ca->userid = GetUserId();
+       ca->dbid = MyDatabaseId;
+       ca->queryid = queryId;
+       ca->timestamp = timestamp;
+       ca->advice_offset = query_string_length;
+
+       memcpy(ca->textual_data, query_string, query_string_length);
+       memcpy(&ca->textual_data[ca->advice_offset],
+                  advice_string, advice_string_length);
+
+       return ca;
+}
+
+/*
+ * Add a pg_collected_advice object to our backend-local advice collection.
+ *
+ * Caller is responsible for switching to the appropriate memory context;
+ * the provided object should have been allocated in that same context.
+ */
+static void
+pgpa_store_local_advice(pgpa_collected_advice *ca)
+{
+       uint64          chunk_number;
+       uint64          chunk_offset;
+       pgpa_local_advice *la = local_collector;
+
+       /* If the local advice collector isn't initialized yet, do that now. */
+       if (la == NULL)
+       {
+               la = palloc0(sizeof(pgpa_local_advice));
+               la->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+               la->chunks = palloc0_array(pgpa_local_advice_chunk *,
+                                                                  la->chunk_array_allocated_size);
+               local_collector = la;
+       }
+
+       /* Compute chunk and offset at which to store this advice. */
+       chunk_number = (la->next_id - la->base_id) / ADVICE_CHUNK_SIZE;
+       chunk_offset = (la->next_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+       /* Extend chunk array, if needed. */
+       if (chunk_number >= la->chunk_array_allocated_size)
+       {
+               int                     new_size;
+
+               new_size = la->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+               la->chunks = repalloc0_array(la->chunks,
+                                                                        pgpa_local_advice_chunk *,
+                                                                        la->chunk_array_allocated_size,
+                                                                        new_size);
+               la->chunk_array_allocated_size = new_size;
+       }
+
+       /* Allocate new chunk, if needed. */
+       if (la->chunks[chunk_number] == NULL)
+               la->chunks[chunk_number] = palloc0_object(pgpa_local_advice_chunk);
+
+       /* Save pointer and bump next-id counter. */
+       Assert(la->chunks[chunk_number]->entries[chunk_offset] == NULL);
+       la->chunks[chunk_number]->entries[chunk_offset] = ca;
+       ++la->next_id;
+
+       /* If we've exceeded the storage limit, discard old data. */
+       pgpa_trim_local_advice(pg_plan_advice_local_collection_limit);
+}
+
+/*
+ * Add a pg_collected_advice object to the shared advice collection.
+ *
+ * 'ca_pointer' should have been allocated from the pg_plan_advice DSA area
+ * and should point to an object of type pgpa_collected_advice.
+ */
+static void
+pgpa_store_shared_advice(dsa_pointer ca_pointer)
+{
+       uint64          chunk_number;
+       uint64          chunk_offset;
+       pgpa_shared_state *state = pg_plan_advice_attach();
+       dsa_area   *area = pg_plan_advice_dsa_area();
+       pgpa_shared_advice *sa = shared_collector;
+       dsa_pointer *chunk_array;
+       pgpa_shared_advice_chunk *chunk;
+
+       /* Lock the shared state. */
+       LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+       /*
+        * If we're not attached to the shared advice collector yet, fix that now.
+        * If we're the first ones to attach, we may need to create the object.
+        */
+       if (sa == NULL)
+       {
+               if (state->shared_collector == InvalidDsaPointer)
+                       state->shared_collector =
+                               dsa_allocate0(area, sizeof(pgpa_shared_advice));
+               shared_collector = sa = dsa_get_address(area, state->shared_collector);
+       }
+
+       /*
+        * It's possible that some other backend may have succeeded in creating
+        * the main collector object but failed to allocate an initial chunk
+        * array, so we must be prepared to allocate the chunk array here whether
+        * or not we created the collector object.
+        */
+       if (shared_collector->chunk_array_allocated_size == 0)
+       {
+               sa->chunks =
+                       dsa_allocate0(area,
+                                                 sizeof(dsa_pointer) * ADVICE_CHUNK_ARRAY_SIZE);
+               sa->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+       }
+
+       /* Compute chunk and offset at which to store this advice. */
+       chunk_number = (sa->next_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+       chunk_offset = (sa->next_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+       /* Get the address of the chunk array and, if needed, extend it. */
+       if (chunk_number >= sa->chunk_array_allocated_size)
+       {
+               int                     new_size;
+               dsa_pointer new_chunks;
+
+               /*
+                * DSA can't enlarge an existing allocation, so we must make a new
+                * allocation and copy data over.
+                */
+               new_size = sa->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+               new_chunks = dsa_allocate0(area, sizeof(dsa_pointer) * new_size);
+               chunk_array = dsa_get_address(area, new_chunks);
+               memcpy(chunk_array, dsa_get_address(area, sa->chunks),
+                          sizeof(dsa_pointer) * sa->chunk_array_allocated_size);
+               dsa_free(area, sa->chunks);
+               sa->chunks = new_chunks;
+               sa->chunk_array_allocated_size = new_size;
+       }
+       else
+               chunk_array = dsa_get_address(area, sa->chunks);
+
+       /* Get the address of the desired chunk, allocating it if needed. */
+       if (chunk_array[chunk_number] == InvalidDsaPointer)
+               chunk_array[chunk_number] =
+                       dsa_allocate0(area, sizeof(pgpa_shared_advice_chunk));
+       chunk = dsa_get_address(area, chunk_array[chunk_number]);
+
+       /* Save pointer and bump next-id counter. */
+       Assert(chunk->entries[chunk_offset] == InvalidDsaPointer);
+       chunk->entries[chunk_offset] = ca_pointer;
+       ++sa->next_id;
+
+       /* If we've exceeded the storage limit, discard old data. */
+       pgpa_trim_shared_advice(area, pg_plan_advice_shared_collection_limit);
+
+       /* Release lock on shared state. */
+       LWLockRelease(&state->lock);
+}
+
+/*
+ * Discard collected advice stored in backend-local memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_local_advice(int limit)
+{
+       pgpa_local_advice *la = local_collector;
+       uint64          current_count;
+       uint64          trim_count;
+       uint64          total_chunk_count;
+       uint64          trim_chunk_count;
+       uint64          remaining_chunk_count;
+
+       /* If we haven't yet reached the limit, there's nothing to do. */
+       current_count = la->next_id - la->oldest_id;
+       if (current_count <= limit)
+               return;
+
+       /* Free enough entries to get us back down to the limit. */
+       trim_count = current_count - limit;
+       while (trim_count > 0)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+
+               chunk_number = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (la->oldest_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+               Assert(la->chunks[chunk_number]->entries[chunk_offset] != NULL);
+               pfree(la->chunks[chunk_number]->entries[chunk_offset]);
+               la->chunks[chunk_number]->entries[chunk_offset] = NULL;
+               ++la->oldest_id;
+               --trim_count;
+       }
+
+       /* Free any chunks that are now entirely unused. */
+       trim_chunk_count = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+       for (uint64 n = 0; n < trim_chunk_count; ++n)
+               pfree(la->chunks[n]);
+
+       /* Slide remaining chunk pointers back toward the base of the array. */
+       total_chunk_count = (la->next_id - la->base_id +
+                                                ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+       remaining_chunk_count = total_chunk_count - trim_chunk_count;
+       if (remaining_chunk_count > 0)
+               memmove(&la->chunks[0], &la->chunks[trim_chunk_count],
+                               sizeof(pgpa_local_advice_chunk *) * remaining_chunk_count);
+
+       /* Don't leave stale pointers around. */
+       memset(&la->chunks[remaining_chunk_count], 0,
+                  sizeof(pgpa_local_advice_chunk *)
+                  * (total_chunk_count - remaining_chunk_count));
+
+       /* Adjust base ID value accordingly. */
+       la->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * Discard collected advice stored in shared memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_shared_advice(dsa_area *area, int limit)
+{
+       pgpa_shared_advice *sa = shared_collector;
+       uint64          current_count;
+       uint64          trim_count;
+       uint64          total_chunk_count;
+       uint64          trim_chunk_count;
+       uint64          remaining_chunk_count;
+       dsa_pointer *chunk_array;
+
+       /* If we haven't yet reached the limit, there's nothing to do. */
+       current_count = sa->next_id - sa->oldest_id;
+       if (current_count <= limit)
+               return;
+
+       /* Get a pointer to the chunk array. */
+       chunk_array = dsa_get_address(area, sa->chunks);
+
+       /* Free enough entries to get us back down to the limit. */
+       trim_count = current_count - limit;
+       while (trim_count > 0)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+               pgpa_shared_advice_chunk *chunk;
+
+               chunk_number = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (sa->oldest_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+               chunk = dsa_get_address(area, chunk_array[chunk_number]);
+               Assert(chunk->entries[chunk_offset] != InvalidDsaPointer);
+               dsa_free(area, chunk->entries[chunk_offset]);
+               chunk->entries[chunk_offset] = InvalidDsaPointer;
+               ++sa->oldest_id;
+               --trim_count;
+       }
+
+       /* Free any chunks that are now entirely unused. */
+       trim_chunk_count = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+       for (uint64 n = 0; n < trim_chunk_count; ++n)
+               dsa_free(area, chunk_array[n]);
+
+       /* Slide remaining chunk pointers back toward the base of the array. */
+       total_chunk_count = (sa->next_id - sa->base_id +
+                                                ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+       remaining_chunk_count = total_chunk_count - trim_chunk_count;
+       if (remaining_chunk_count > 0)
+               memmove(&chunk_array[0], &chunk_array[trim_chunk_count],
+                               sizeof(dsa_pointer) * remaining_chunk_count);
+
+       /* Don't leave stale pointers around. */
+       memset(&chunk_array[remaining_chunk_count], 0,
+                  sizeof(pgpa_shared_advice_chunk *)
+                  * (total_chunk_count - remaining_chunk_count));
+
+       /* Adjust base ID value accordingly. */
+       sa->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_local_advice(PG_FUNCTION_ARGS)
+{
+       if (local_collector != NULL)
+               pgpa_trim_local_advice(0);
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+       pgpa_shared_state *state = pg_plan_advice_attach();
+       dsa_area   *area = pg_plan_advice_dsa_area();
+
+       LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+       /*
+        * If we're not attached to the shared advice collector yet, fix that now;
+        * but if the collector doesn't even exist, we can return without doing
+        * anything else.
+        */
+       if (shared_collector == NULL)
+       {
+               if (state->shared_collector == InvalidDsaPointer)
+               {
+                       LWLockRelease(&state->lock);
+                       return (Datum) 0;
+               }
+               shared_collector = dsa_get_address(area, state->shared_collector);
+       }
+
+       /* Do the real work */
+       pgpa_trim_shared_advice(area, 0);
+
+       LWLockRelease(&state->lock);
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable SRF to return advice collected in backend-local memory
+ */
+Datum
+pg_get_collected_local_advice(PG_FUNCTION_ARGS)
+{
+       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       pgpa_local_advice *la = local_collector;
+       Oid                     userid = GetUserId();
+
+       InitMaterializedSRF(fcinfo, 0);
+
+       if (la == NULL)
+               return (Datum) 0;
+
+       /* Loop over all entries. */
+       for (uint64 id = la->oldest_id; id < la->next_id; ++id)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+               pgpa_collected_advice *ca;
+               Datum           values[PG_GET_ADVICE_COLUMNS];
+               bool            nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+               chunk_number = (id - la->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+               ca = la->chunks[chunk_number]->entries[chunk_offset];
+
+               if (!member_can_set_role(userid, ca->userid))
+                       continue;
+
+               values[0] = UInt64GetDatum(id);
+               values[1] = ObjectIdGetDatum(ca->userid);
+               values[2] = ObjectIdGetDatum(ca->dbid);
+               values[3] = UInt64GetDatum(ca->queryid);
+               values[4] = TimestampGetDatum(ca->timestamp);
+               values[5] = CStringGetTextDatum(query_string(ca));
+               values[6] = CStringGetTextDatum(advice_string(ca));
+
+               tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+                                                        values, nulls);
+       }
+
+       return (Datum) 0;
+}
+
+/*
+ * SQL-callable SRF to return advice collected in shared memory
+ */
+Datum
+pg_get_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       pgpa_shared_state *state = pg_plan_advice_attach();
+       dsa_area   *area = pg_plan_advice_dsa_area();
+       dsa_pointer *chunk_array;
+       pgpa_shared_advice *sa = shared_collector;
+
+       InitMaterializedSRF(fcinfo, 0);
+
+       /* Lock the shared state. */
+       LWLockAcquire(&state->lock, LW_SHARED);
+
+       /*
+        * If we're not attached to the shared advice collector yet, fix that now;
+        * but if the collector doesn't even exist, we can return without doing
+        * anything else.
+        */
+       if (sa == NULL)
+       {
+               if (state->shared_collector == InvalidDsaPointer)
+               {
+                       LWLockRelease(&state->lock);
+                       return (Datum) 0;
+               }
+               shared_collector = sa = dsa_get_address(area, state->shared_collector);
+       }
+
+       /* Get a pointer to the chunk array. */
+       chunk_array = dsa_get_address(area, sa->chunks);
+
+       /* Loop over all entries. */
+       for (uint64 id = sa->oldest_id; id < sa->next_id; ++id)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+               pgpa_shared_advice_chunk *chunk;
+               pgpa_collected_advice *ca;
+               Datum           values[PG_GET_ADVICE_COLUMNS];
+               bool            nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+               chunk_number = (id - sa->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+               chunk = dsa_get_address(area, chunk_array[chunk_number]);
+               ca = dsa_get_address(area, chunk->entries[chunk_offset]);
+
+               values[0] = UInt64GetDatum(id);
+               values[1] = ObjectIdGetDatum(ca->userid);
+               values[2] = ObjectIdGetDatum(ca->dbid);
+               values[3] = UInt64GetDatum(ca->queryid);
+               values[4] = TimestampGetDatum(ca->timestamp);
+               values[5] = CStringGetTextDatum(query_string(ca));
+               values[6] = CStringGetTextDatum(advice_string(ca));
+
+               tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+                                                        values, nulls);
+       }
+
+       /* Release lock on shared state. */
+       LWLockRelease(&state->lock);
+
+       return (Datum) 0;
+}
diff --git a/contrib/pg_plan_advice/pgpa_collector.h b/contrib/pg_plan_advice/pgpa_collector.h

new file mode 100644 (file)

index 0000000..b6e746a
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_collector.h
@@ -0,0 +1,18 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_collector.h
+ *       collect advice into backend-local or shared memory
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_collector.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_COLLECTOR_H
+#define PGPA_COLLECTOR_H
+
+extern void pgpa_collect_advice(uint64 queryId, const char *query_string,
+                                                               const char *advice_string);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_identifier.c b/contrib/pg_plan_advice/pgpa_identifier.c

new file mode 100644 (file)

index 0000000..2fa8075
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_identifier.c
@@ -0,0 +1,476 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.c
+ *       create appropriate identifiers for range table entries
+ *
+ * The goal of this module is to be able to produce identifiers for range
+ * table entries that are unique, understandable to human beings, and
+ * able to be reconstructed during future planning cycles. As an
+ * exception, we do not care about, or want to produce, identifiers for
+ * RTE_JOIN entries. This is because (1) we would end up with a ton of
+ * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
+ * RTEs; and (3) we intend to refer to joins by their constituent members
+ * rather than by reference to the join RTE.
+ *
+ * In general, we construct identifiers of the following form:
+ *
+ * alias_name#occurrence_number/child_table_name@subquery_name
+ *
+ * However, occurrence_number is omitted when it is the first occurrence
+ * within the same subquery, child_table_name is omitted for relations that
+ * are not child tables, and subquery_name is omitted for the topmost
+ * query level. Whenever an item is omitted, the preceding punctuation mark
+ * is also omitted.  Identifier-style escaping is applied to alias_name and
+ * subquery_name.  Whenever we include child_table_name, we always
+ * schema-qualified name, but writing their own plan advice are not required
+ * to do so.  Identifier-style escaping is applied to the schema and to the
+ * relation names separately.
+ *
+ * The upshot of all of these rules is that in simple cases, the relation
+ * identifier is textually identical to the alias name, making life easier
+ * for users. However, even in complex cases, every relation identifier
+ * for a given query will be unique (or at least we hope so: if not, this
+ * code is buggy and the identifier format might need to be rethought).
+ *
+ * A key goal of this system is that we want to be able to reconstruct the
+ * same identifiers during a future planning cycle for the same query, so
+ * that if a certain behavior is specified for a certain identifier, we can
+ * properly identify the RTI for which that behavior is mandated. In order
+ * for this to work, subquery names must be unique and known before the
+ * subquery is planned, and the remainder of the identifier must not depend
+ * on any part of the query outside of the current subquery level. In
+ * particular, occurrence_number must be calculated relative to the range
+ * table for the relevant subquery, not the final flattened range table.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_identifier.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_identifier.h"
+
+#include "parser/parsetree.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable,
+                                                                         List *appinfos);
+static int     pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+                                                                  SubPlanRTInfo *rtinfo, Index rti);
+
+/*
+ * Create a range table identifier from scratch.
+ *
+ * This function leaves the caller to do all the heavy lifting, so it's
+ * generally better to use one of the functions below instead.
+ *
+ * See the file header comments for more details on the format of an
+ * identifier.
+ */
+const char *
+pgpa_identifier_string(const pgpa_identifier *rid)
+{
+       const char *result;
+
+       Assert(rid->alias_name != NULL);
+       result = quote_identifier(rid->alias_name);
+
+       Assert(rid->occurrence >= 0);
+       if (rid->occurrence > 1)
+               result = psprintf("%s#%d", result, rid->occurrence);
+
+       if (rid->partrel != NULL)
+       {
+               if (rid->partnsp == NULL)
+                       result = psprintf("%s/%s", result,
+                                                         quote_identifier(rid->partnsp));
+               else
+                       result = psprintf("%s/%s.%s", result,
+                                                         quote_identifier(rid->partnsp),
+                                                         quote_identifier(rid->partrel));
+       }
+
+       if (rid->plan_name != NULL)
+               result = psprintf("%s@%s", result, quote_identifier(rid->plan_name));
+
+       return result;
+}
+
+/*
+ * Compute a relation identifier for a particular RTI.
+ *
+ * The caller provides root and rti, and gets the necessary details back via
+ * the remaining parameters.
+ */
+void
+pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
+                                                          pgpa_identifier *rid)
+{
+       Index           top_rti = rti;
+       int                     occurrence = 1;
+       RangeTblEntry *rte;
+       RangeTblEntry *top_rte;
+       char       *partnsp = NULL;
+       char       *partrel = NULL;
+
+       /*
+        * If this is a child RTE, find the topmost parent that is still of type
+        * RTE_RELATION. We do this because we identify children of partitioned
+        * tables by the name of the child table, but subqueries can also have
+        * child rels and we don't care about those here.
+        */
+       for (;;)
+       {
+               AppendRelInfo *appinfo;
+               RangeTblEntry *parent_rte;
+
+               /* append_rel_array can be NULL if there are no children */
+               if (root->append_rel_array == NULL ||
+                       (appinfo = root->append_rel_array[top_rti]) == NULL)
+                       break;
+
+               parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
+               if (parent_rte->rtekind != RTE_RELATION)
+                       break;
+
+               top_rti = appinfo->parent_relid;
+       }
+
+       /* Get the range table entries for the RTI and top RTI. */
+       rte = planner_rt_fetch(rti, root);
+       top_rte = planner_rt_fetch(top_rti, root);
+       Assert(rte->rtekind != RTE_JOIN);
+       Assert(top_rte->rtekind != RTE_JOIN);
+
+       /* Work out the correct occurrence number. */
+       for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti)
+       {
+               RangeTblEntry *prior_rte;
+               AppendRelInfo *appinfo;
+
+               /*
+                * If this is a child rel of a parent that is a relation, skip it.
+                *
+                * Such range table entries are disambiguated by mentioning the schema
+                * and name of the table, not by counting them as separate occurrences
+                * of the same table.
+                *
+                * NB: append_rel_array can be NULL if there are no children
+                */
+               if (root->append_rel_array != NULL &&
+                       (appinfo = root->append_rel_array[prior_rti]) != NULL)
+               {
+                       RangeTblEntry *parent_rte;
+
+                       parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
+                       if (parent_rte->rtekind == RTE_RELATION)
+                               continue;
+               }
+
+               /* Skip NULL entries and joins. */
+               prior_rte = planner_rt_fetch(prior_rti, root);
+               if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN)
+                       continue;
+
+               /* Skip if the alias name differs. */
+               if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+                       continue;
+
+               /* Looks like a true duplicate. */
+               ++occurrence;
+       }
+
+       /* If this is a child table, get the schema and relation names. */
+       if (rti != top_rti)
+       {
+               partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+               partrel = get_rel_name(rte->relid);
+       }
+
+       /* OK, we have all the answers we need. Return them to the caller. */
+       rid->alias_name = top_rte->eref->aliasname;
+       rid->occurrence = occurrence;
+       rid->partnsp = partnsp;
+       rid->partrel = partrel;
+       rid->plan_name = root->plan_name;
+}
+
+/*
+ * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN
+ * RTIs that may be present.
+ *
+ * RTE_JOIN entries are excluded because they cannot be mentioned by plan
+ * advice.
+ *
+ * The caller is responsible for making sure that the tkeys array is large
+ * enough to store the results.
+ *
+ * The return value is the number of identifiers computed.
+ */
+int
+pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids,
+                                                                  pgpa_identifier *rids)
+{
+       int                     count = 0;
+       int                     rti = -1;
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               RangeTblEntry *rte = planner_rt_fetch(rti, root);
+
+               if (rte->rtekind == RTE_JOIN)
+                       continue;
+               pgpa_compute_identifier_by_rti(root, rti, &rids[count++]);
+       }
+
+       Assert(count > 0);
+       return count;
+}
+
+/*
+ * Create an array of range table identifiers for all the non-NULL,
+ * non-RTE_JOIN entries in the PlannedStmt's range table.
+ */
+pgpa_identifier *
+pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
+{
+       Index           rtable_length = list_length(pstmt->rtable);
+       pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length);
+       Index      *top_rti_map;
+       int                     rtinfoindex = 0;
+       SubPlanRTInfo *rtinfo = NULL;
+       SubPlanRTInfo *nextrtinfo = NULL;
+
+       /*
+        * Account for relations addded by inheritance expansion of partitioned
+        * tables.
+        */
+       top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable,
+                                                                                 pstmt->appendRelations);
+
+       /*
+        * When we begin iterating, we're processing the portion of the range
+        * table that originated from the top-level PlannerInfo, so subrtinfo is
+        * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
+        * portion of the range table we are processing. nextrtinfo is always the
+        * SubPlanRTInfo that follows the current one, if any, so when we're
+        * processing the top-level query's portion of the range table, the next
+        * SubPlanRTInfo is the very first one.
+        */
+       if (pstmt->subrtinfos != NULL)
+               nextrtinfo = linitial(pstmt->subrtinfos);
+
+       /* Main loop over the range table. */
+       for (Index rti = 1; rti <= rtable_length; rti++)
+       {
+               const char *plan_name;
+               Index           top_rti;
+               RangeTblEntry *rte;
+               RangeTblEntry *top_rte;
+               char       *partnsp = NULL;
+               char       *partrel = NULL;
+               int                     occurrence;
+               pgpa_identifier *rid;
+
+               /*
+                * Advance to the next SubPlanRTInfo, if it's time to do that.
+                *
+                * This loop probably shouldn't ever iterate more than once, because
+                * that would imply that a subquery was planned but added nothing to
+                * the range table; but let's be defensive and assume it can happen.
+                */
+               while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
+               {
+                       rtinfo = nextrtinfo;
+                       if (++rtinfoindex >= list_length(pstmt->subrtinfos))
+                               nextrtinfo = NULL;
+                       else
+                               nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex);
+               }
+
+               /* Fetch the range table entry, if any. */
+               rte = rt_fetch(rti, pstmt->rtable);
+
+               /*
+                * We can't and don't need to identify null entries, and we don't want
+                * to identify join entries.
+                */
+               if (rte == NULL || rte->rtekind == RTE_JOIN)
+                       continue;
+
+               /*
+                * If this is not a relation added by partitioned table expansion,
+                * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
+                * we need the information for the top RTI/RTE, and must also fetch
+                * the partition schema and name.
+                */
+               top_rti = top_rti_map[rti - 1];
+               if (rti == top_rti)
+                       top_rte = rte;
+               else
+               {
+                       top_rte = rt_fetch(top_rti, pstmt->rtable);
+                       partnsp =
+                               get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+                       partrel = get_rel_name(rte->relid);
+               }
+
+               /* Compute the correct occurrence number. */
+               occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
+                                                                                       rtinfo, top_rti);
+
+               /* Get the name of the current plan (NULL for toplevel query). */
+               plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
+
+               /* Save all the details we've derived. */
+               rid = &result[rti - 1];
+               rid->alias_name = top_rte->eref->aliasname;
+               rid->occurrence = occurrence;
+               rid->partnsp = partnsp;
+               rid->partrel = partrel;
+               rid->plan_name = plan_name;
+       }
+
+       return result;
+}
+
+/*
+ * Search for a pgpa_identifier in the array of identifiers computed for the
+ * range table. If exactly one match is found, return the matching RTI; else
+ * return 0.
+ */
+Index
+pgpa_compute_rti_from_identifier(int rtable_length,
+                                                                pgpa_identifier *rt_identifiers,
+                                                                pgpa_identifier *rid)
+{
+       Index           result = 0;
+
+       for (Index rti = 1; rti <= rtable_length; ++rti)
+       {
+               pgpa_identifier *rti_rid = &rt_identifiers[rti - 1];
+
+               /* If there's no identifier for this RTI, skip it. */
+               if (rti_rid->alias_name == NULL)
+                       continue;
+
+               /*
+                * If it matches, return this RTI. As usual, an omitted partition
+                * schema matches anything, but partition and plan names must either
+                * match exactly or be omitted on both sides.
+                */
+               if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 &&
+                       rid->occurrence == rti_rid->occurrence &&
+                       (rid->partnsp == NULL || rti_rid->partnsp == NULL ||
+                        strcmp(rid->partnsp, rti_rid->partnsp) == 0) &&
+                       strings_equal_or_both_null(rid->partrel, rti_rid->partrel) &&
+                       strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name))
+               {
+                       if (result != 0)
+                       {
+                               /* Multiple matches were found. */
+                               return 0;
+                       }
+                       result = rti;
+               }
+       }
+
+       return result;
+}
+
+/*
+ * Build a mapping from each RTI to the RTI whose alias_name will be used to
+ * construct the range table identifier.
+ *
+ * For child relations, this is the topmost parent that is still of type
+ * RTE_RELATION. For other relations, it's just the original RTI.
+ *
+ * Since we're eventually going to need this information for every RTI in
+ * the range table, it's best to compute all the answers in a single pass over
+ * the AppendRelInfo list. Otherwise, we might end up searching through that
+ * list repeatedly for entries of interest.
+ *
+ * Note that the returned array is uses zero-based indexing, while RTIs use
+ * 1-based indexing, so subtract 1 from the RTI before looking it up in the
+ * array.
+ */
+static Index *
+pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
+{
+       Index      *top_rti_map = palloc0_array(Index, rtable_length);
+
+       /* Initially, make every RTI point to itself. */
+       for (Index rti = 1; rti <= rtable_length; ++rti)
+               top_rti_map[rti - 1] = rti;
+
+       /* Update the map for each AppendRelInfo object. */
+       foreach_node(AppendRelInfo, appinfo, appinfos)
+       {
+               Index           parent_rti = appinfo->parent_relid;
+               RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable);
+
+               /* If the parent is not RTE_RELATION, ignore this entry. */
+               if (parent_rte->rtekind != RTE_RELATION)
+                       continue;
+
+               /*
+                * Map the child to wherever we mapped the parent. Parents always
+                * precede their children in the AppendRelInfo list, so this should
+                * work out.
+                */
+               top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
+       }
+
+       return top_rti_map;
+}
+
+/*
+ * Find the occurence number of a certain relation within a certain subquery.
+ *
+ * The same alias name can occur multiple times within a subquery, but we want
+ * to disambiguate by giving different occurrences different integer indexes.
+ * However, child tables are disambiguated by including the table name rather
+ * than by incrementing the occurrence number; and joins are not named and so
+ * shouldn't increment the occurence number either.
+ */
+static int
+pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+                                          SubPlanRTInfo *rtinfo, Index rti)
+{
+       Index           rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
+       int                     occurrence = 1;
+       RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+       for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
+       {
+               RangeTblEntry *prior_rte;
+
+               /*
+                * If this is a child rel of a parent that is a relation, skip it.
+                *
+                * Such range table entries are disambiguated by mentioning the schema
+                * and name of the table, not by counting them as separate occurrences
+                * of the same table.
+                */
+               if (top_rti_map[prior_rti - 1] != prior_rti)
+                       break;
+
+               /* Skip joins. */
+               prior_rte = rt_fetch(prior_rti, rtable);
+               if (prior_rte->rtekind == RTE_JOIN)
+                       continue;
+
+               /* Skip if the alias name differs. */
+               if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+                       continue;
+
+               /* Looks like a true duplicate. */
+               ++occurrence;
+       }
+
+       return occurrence;
+}
diff --git a/contrib/pg_plan_advice/pgpa_identifier.h b/contrib/pg_plan_advice/pgpa_identifier.h

new file mode 100644 (file)

index 0000000..b000d2b
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_identifier.h
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.h
+ *       create appropriate identifiers for range table entries
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_identifier.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PGPA_IDENTIFIER_H
+#define PGPA_IDENTIFIER_H
+
+#include "nodes/pathnodes.h"
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_identifier
+{
+       const char *alias_name;
+       int                     occurrence;
+       const char *partnsp;
+       const char *partrel;
+       const char *plan_name;
+} pgpa_identifier;
+
+/* Convenience function for comparing possibly-NULL strings. */
+static inline bool
+strings_equal_or_both_null(const char *a, const char *b)
+{
+       if (a == b)
+               return true;
+       else if (a == NULL || b == NULL)
+               return false;
+       else
+               return strcmp(a, b) == 0;
+}
+
+extern const char *pgpa_identifier_string(const pgpa_identifier *rid);
+extern void pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
+                                                                                  pgpa_identifier *rid);
+extern int     pgpa_compute_identifiers_by_relids(PlannerInfo *root,
+                                                                                          Bitmapset *relids,
+                                                                                          pgpa_identifier *rids);
+extern pgpa_identifier *pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt);
+
+extern Index pgpa_compute_rti_from_identifier(int rtable_length,
+                                                                                         pgpa_identifier *rt_identifiers,
+                                                                                         pgpa_identifier *rid);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c

new file mode 100644 (file)

index 0000000..2861876
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_join.c
@@ -0,0 +1,615 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.c
+ *       analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_join.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/pathnodes.h"
+#include "nodes/print.h"
+#include "parser/parsetree.h"
+
+/*
+ * Temporary object used when unrolling a join tree.
+ */
+struct pgpa_join_unroller
+{
+       unsigned        nallocated;
+       unsigned        nused;
+       Plan       *outer_subplan;
+       ElidedNode *outer_elided_node;
+       bool            outer_beneath_any_gather;
+       pgpa_join_strategy *strategy;
+       Plan      **inner_subplans;
+       ElidedNode **inner_elided_nodes;
+       pgpa_join_unroller **inner_unrollers;
+       bool       *inner_beneath_any_gather;
+};
+
+static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker,
+                                                                                         Plan *plan,
+                                                                                         Plan **realouter,
+                                                                                         Plan **realinner,
+                                                                                         ElidedNode **elidedrealouter,
+                                                                                         ElidedNode **elidedrealinner,
+                                                                                         bool *found_any_outer_gather,
+                                                                                         bool *found_any_inner_gather);
+static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan);
+static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan,
+                                                                                  bool *found_any_gather);
+static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+                                                                       ElidedNode **elided_node);
+
+static bool is_result_node_with_child(Plan *plan);
+static bool is_sorting_plan(Plan *plan);
+
+/*
+ * Create an initially-empty object for unrolling joins.
+ *
+ * This function creates a helper object that can later be used to create a
+ * pgpa_unrolled_join, after first calling pgpa_unroll_join one or more times.
+ */
+pgpa_join_unroller *
+pgpa_create_join_unroller(void)
+{
+       pgpa_join_unroller *join_unroller;
+
+       join_unroller = palloc0_object(pgpa_join_unroller);
+       join_unroller->nallocated = 4;
+       join_unroller->strategy =
+               palloc_array(pgpa_join_strategy, join_unroller->nallocated);
+       join_unroller->inner_subplans =
+               palloc_array(Plan *, join_unroller->nallocated);
+       join_unroller->inner_elided_nodes =
+               palloc_array(ElidedNode *, join_unroller->nallocated);
+       join_unroller->inner_unrollers =
+               palloc_array(pgpa_join_unroller *, join_unroller->nallocated);
+       join_unroller->inner_beneath_any_gather =
+               palloc_array(bool, join_unroller->nallocated);
+
+       return join_unroller;
+}
+
+/*
+ * Unroll one level of an unrollable join tree.
+ *
+ * Our basic goal here is to unroll join trees as they occur in the Plan
+ * tree into a simpler and more regular structure that we can more easily
+ * use for further processing. Unrolling is outer-deep, so if the plan tree
+ * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be
+ * used for Join1 and Join2, but a different one will be needed for Join3,
+ * since that involves a join within the *inner* side of another join.
+ *
+ * pgpa_plan_walker creates a "top level" join unroller object when it
+ * encounters a join in a portion of the plan tree in which no join unroller
+ * is already active. From there, this function is responsible for determing
+ * to what portion of the plan tree that join unroller applies, and for
+ * creating any subordinate join unroller objects that are needed as a result
+ * of non-outer-deep join trees. We do this by returning the join unroller
+ * objects that should be used for further traversal of the outer and inner
+ * subtrees of the current plan node via *outer_join_unroller and
+ * *inner_join_unroller, respectively.
+ */
+void
+pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan,
+                                bool beneath_any_gather,
+                                pgpa_join_unroller *join_unroller,
+                                pgpa_join_unroller **outer_join_unroller,
+                                pgpa_join_unroller **inner_join_unroller)
+{
+       pgpa_join_strategy strategy;
+       Plan       *realinner,
+                          *realouter;
+       ElidedNode *elidedinner,
+                          *elidedouter;
+       int                     n;
+       bool            found_any_outer_gather = false;
+       bool            found_any_inner_gather = false;
+
+       Assert(join_unroller != NULL);
+
+       /*
+        * We need to pass the join_unroller object down through certain types of
+        * plan nodes -- anything that's considered part of the join strategy, and
+        * any other nodes that can occur in a join tree despite not being scans
+        * or joins.
+        *
+        * This includes:
+        *
+        * (1) Materialize, Memoize, and Hash nodes, which are part of the join
+        * strategy,
+        *
+        * (2) Gather and Gather Merge nodes, which can occur at any point in the
+        * join tree where the planner decided to initiate parallelism,
+        *
+        * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin
+        * or GatherMerge,
+        *
+        * (4) Agg and Unique nodes, which can occur when we decide to make the
+        * nullable side of a semijoin unique and then join the result, and
+        *
+        * (5) Result nodes with children, which can be added either to project to
+        * enforce a one-time filter (but Result nodes without children are
+        * degenerate scans or joins).
+        */
+       if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash)
+               || IsA(plan, Gather) || IsA(plan, GatherMerge)
+               || is_sorting_plan(plan) || IsA(plan, Agg) || IsA(plan, Unique)
+               || is_result_node_with_child(plan))
+       {
+               *outer_join_unroller = join_unroller;
+               return;
+       }
+
+       /*
+        * Since we've already handled nodes that require pass-through treatment,
+        * this should be an unrollable join.
+        */
+       strategy = pgpa_decompose_join(walker, plan,
+                                                                  &realouter, &realinner,
+                                                                  &elidedouter, &elidedinner,
+                                                                  &found_any_outer_gather,
+                                                                  &found_any_inner_gather);
+
+       /* If our workspace is full, expand it. */
+       if (join_unroller->nused >= join_unroller->nallocated)
+       {
+               join_unroller->nallocated *= 2;
+               join_unroller->strategy =
+                       repalloc_array(join_unroller->strategy,
+                                                  pgpa_join_strategy,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_subplans =
+                       repalloc_array(join_unroller->inner_subplans,
+                                                  Plan *,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_elided_nodes =
+                       repalloc_array(join_unroller->inner_elided_nodes,
+                                                  ElidedNode *,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_beneath_any_gather =
+                       repalloc_array(join_unroller->inner_beneath_any_gather,
+                                                  bool,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_unrollers =
+                       repalloc_array(join_unroller->inner_unrollers,
+                                                  pgpa_join_unroller *,
+                                                  join_unroller->nallocated);
+       }
+
+       /*
+        * Since we're flattening outer-deep join trees, it follows that if the
+        * outer side is still an unrollable join, it should be unrolled into this
+        * same object. Otherwise, we've reached the limit of what we can unroll
+        * into this object and must remember the outer side as the final outer
+        * subplan.
+        */
+       if (elidedouter == NULL && pgpa_is_join(realouter))
+               *outer_join_unroller = join_unroller;
+       else
+       {
+               join_unroller->outer_subplan = realouter;
+               join_unroller->outer_elided_node = elidedouter;
+               join_unroller->outer_beneath_any_gather =
+                       beneath_any_gather || found_any_outer_gather;
+       }
+
+       /*
+        * Store the inner subplan. If it's an unrollable join, it needs to be
+        * flattened in turn, but into a new unroller object, not this one.
+        */
+       n = join_unroller->nused++;
+       join_unroller->strategy[n] = strategy;
+       join_unroller->inner_subplans[n] = realinner;
+       join_unroller->inner_elided_nodes[n] = elidedinner;
+       join_unroller->inner_beneath_any_gather[n] =
+               beneath_any_gather || found_any_inner_gather;
+       if (elidedinner == NULL && pgpa_is_join(realinner))
+               *inner_join_unroller = pgpa_create_join_unroller();
+       else
+               *inner_join_unroller = NULL;
+       join_unroller->inner_unrollers[n] = *inner_join_unroller;
+}
+
+/*
+ * Use the data we've accumulated in a pgpa_join_unroller object to construct
+ * a pgpa_unrolled_join.
+ */
+pgpa_unrolled_join *
+pgpa_build_unrolled_join(pgpa_plan_walker_context *walker,
+                                                pgpa_join_unroller *join_unroller)
+{
+       pgpa_unrolled_join *ujoin;
+       int                     i;
+
+       /*
+        * We shouldn't have gone even so far as to create a join unroller unless
+        * we found at least one unrollable join.
+        */
+       Assert(join_unroller->nused > 0);
+
+       /* Allocate result structures. */
+       ujoin = palloc0_object(pgpa_unrolled_join);
+       ujoin->ninner = join_unroller->nused;
+       ujoin->strategy = palloc0_array(pgpa_join_strategy, join_unroller->nused);
+       ujoin->inner = palloc0_array(pgpa_join_member, join_unroller->nused);
+
+       /* Handle the outermost join. */
+       ujoin->outer.plan = join_unroller->outer_subplan;
+       ujoin->outer.elided_node = join_unroller->outer_elided_node;
+       ujoin->outer.scan =
+               pgpa_build_scan(walker, ujoin->outer.plan,
+                                               ujoin->outer.elided_node,
+                                               join_unroller->outer_beneath_any_gather,
+                                               true);
+
+       /*
+        * We want the joins from the deepest part of the plan tree to appear
+        * first in the result object, but the join unroller adds them in exactly
+        * the reverse of that order, so we need to flip the order of the arrays
+        * when constructing the final result.
+        */
+       for (i = 0; i < join_unroller->nused; ++i)
+       {
+               int                     k = join_unroller->nused - i - 1;
+
+               /* Copy strategy, Plan, and ElidedNode. */
+               ujoin->strategy[i] = join_unroller->strategy[k];
+               ujoin->inner[i].plan = join_unroller->inner_subplans[k];
+               ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k];
+
+               /*
+                * Fill in remaining details, using either the nested join unroller,
+                * or by deriving them from the plan and elided nodes.
+                */
+               if (join_unroller->inner_unrollers[k] != NULL)
+                       ujoin->inner[i].unrolled_join =
+                               pgpa_build_unrolled_join(walker,
+                                                                                join_unroller->inner_unrollers[k]);
+               else
+                       ujoin->inner[i].scan =
+                               pgpa_build_scan(walker, ujoin->inner[i].plan,
+                                                               ujoin->inner[i].elided_node,
+                                                               join_unroller->inner_beneath_any_gather[i],
+                                                               true);
+       }
+
+       return ujoin;
+}
+
+/*
+ * Free memory allocated for pgpa_join_unroller.
+ */
+void
+pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller)
+{
+       pfree(join_unroller->strategy);
+       pfree(join_unroller->inner_subplans);
+       pfree(join_unroller->inner_elided_nodes);
+       pfree(join_unroller->inner_unrollers);
+       pfree(join_unroller);
+}
+
+/*
+ * Identify the join strategy used by a join and the "real" inner and outer
+ * plans.
+ *
+ * For example, a Hash Join always has a Hash node on the inner side, but
+ * for all intents and purposes the real inner input is the Hash node's child,
+ * not the Hash node itself.
+ *
+ * Likewise, a Merge Join may have Sort note on the inner or outer side; if
+ * it does, the real input to the join is the Sort node's child, not the
+ * Sort node itself.
+ *
+ * In addition, with a Merge Join or a Nested Loop, the join planning code
+ * may add additional nodes such as Materialize or Memoize. We regard these
+ * as an aspect of the join strategy. As in the previous cases, the true input
+ * to the join is the underlying node.
+ *
+ * However, if any involved child node previously had a now-elided node stacked
+ * on top, then we can't "look through" that node -- indeed, what's going to be
+ * relevant for our purposes is the ElidedNode on top of that plan node, rather
+ * than the plan node itself.
+ *
+ * If there are multiple elided nodes, we want that one that would have been
+ * uppermost in the plan tree prior to setrefs processing; we expect to find
+ * that one last in the list of elided nodes.
+ *
+ * On return *realouter and *realinner will have been set to the real inner
+ * and real outer plans that we identified, and *elidedrealouter and
+ * *elidedrealinner to the last of any correspoding elided nodes.
+ * Additionally, *found_any_outer_gather and *found_any_inner_gather will
+ * be set to true if we looked through a Gather or Gather Merge node on
+ * that side of the join, and false otherwise.
+ */
+static pgpa_join_strategy
+pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan,
+                                       Plan **realouter, Plan **realinner,
+                                       ElidedNode **elidedrealouter, ElidedNode **elidedrealinner,
+                                       bool *found_any_outer_gather, bool *found_any_inner_gather)
+{
+       PlannedStmt *pstmt = walker->pstmt;
+       JoinType        jointype = ((Join *) plan)->jointype;
+       Plan       *outerplan = plan->lefttree;
+       Plan       *innerplan = plan->righttree;
+       ElidedNode *elidedouter;
+       ElidedNode *elidedinner;
+       pgpa_join_strategy strategy;
+       bool            uniqueouter;
+       bool            uniqueinner;
+
+       elidedouter = pgpa_last_elided_node(pstmt, outerplan);
+       elidedinner = pgpa_last_elided_node(pstmt, innerplan);
+       *found_any_outer_gather = false;
+       *found_any_inner_gather = false;
+
+       switch (nodeTag(plan))
+       {
+               case T_MergeJoin:
+
+                       /*
+                        * The planner may have chosen to place a Material node on the
+                        * inner side of the MergeJoin; if this is present, we record it
+                        * as part of the join strategy.
+                        */
+                       if (elidedinner == NULL && IsA(innerplan, Material))
+                       {
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                               strategy = JSTRAT_MERGE_JOIN_MATERIALIZE;
+                       }
+                       else
+                               strategy = JSTRAT_MERGE_JOIN_PLAIN;
+
+                       /*
+                        * For a MergeJoin, either the outer or the inner subplan, or
+                        * both, may have needed to be sorted; we must disregard any Sort
+                        * or IncrementalSort node to find the real inner or outer
+                        * subplan.
+                        */
+                       if (elidedouter == NULL && is_sorting_plan(outerplan))
+                               elidedouter = pgpa_descend_node(pstmt, &outerplan);
+                       if (elidedinner == NULL && is_sorting_plan(innerplan))
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                       break;
+
+               case T_NestLoop:
+
+                       /*
+                        * The planner may have chosen to place a Material or Memoize node
+                        * on the inner side of the NestLoop; if this is present, we
+                        * record it as part of the join strategy.
+                        */
+                       if (elidedinner == NULL && IsA(innerplan, Material))
+                       {
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                               strategy = JSTRAT_NESTED_LOOP_MATERIALIZE;
+                       }
+                       else if (elidedinner == NULL && IsA(innerplan, Memoize))
+                       {
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                               strategy = JSTRAT_NESTED_LOOP_MEMOIZE;
+                       }
+                       else
+                               strategy = JSTRAT_NESTED_LOOP_PLAIN;
+                       break;
+
+               case T_HashJoin:
+
+                       /*
+                        * The inner subplan of a HashJoin is always a Hash node; the real
+                        * inner subplan is the Hash node's child.
+                        */
+                       Assert(IsA(innerplan, Hash));
+                       Assert(elidedinner == NULL);
+                       elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                       strategy = JSTRAT_HASH_JOIN;
+                       break;
+
+               default:
+                       elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
+       }
+
+       /*
+        * The planner may have decided to implement a semijoin by first making
+        * the nullable side of the plan unique, and then performing a normal join
+        * against the result. Therefore, we might need to descend through a
+        * unique node on either side of the plan.
+        */
+       uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter);
+       uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner);
+
+       /*
+        * The planner may have decided to parallelize part of the join tree, so
+        * we could find a Gather or Gather Merge node here. Note that, if
+        * present, this will appear below nodes we considered as part of the join
+        * strategy, but we could find another uniqueness-enforcing node below the
+        * Gather or Gather Merge, if present.
+        */
+       if (elidedouter == NULL)
+       {
+               elidedouter = pgpa_descend_any_gather(pstmt, &outerplan,
+                                                                                         found_any_outer_gather);
+               if (found_any_outer_gather &&
+                       pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter))
+                       uniqueouter = true;
+       }
+       if (elidedinner == NULL)
+       {
+               elidedinner = pgpa_descend_any_gather(pstmt, &innerplan,
+                                                                                         found_any_inner_gather);
+               if (found_any_inner_gather &&
+                       pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner))
+                       uniqueinner = true;
+       }
+
+       /*
+        * It's possible that Result node has been inserted either to project a
+        * target list or to implement a one-time filter. If so, we can descend
+        * throught it. Note that a result node without a child would be a
+        * degenerate scan or join, and not something we could descend through.
+        *
+        * XXX. I suspect it's possible for this to happen above the Gather or
+        * Gather Merge node, too, but apparently we have no test case for that
+        * scenario.
+        */
+       if (elidedouter == NULL && is_result_node_with_child(outerplan))
+               elidedouter = pgpa_descend_node(pstmt, &outerplan);
+       if (elidedinner == NULL && is_result_node_with_child(innerplan))
+               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+
+       /*
+        * If this is a semijoin that was converted to an inner join by making one
+        * side or the other unique, make a note that the inner or outer subplan,
+        * as appropriate, should be treated as a query plan feature when the main
+        * tree traversal reaches it.
+        *
+        * Conversely, if the planner could have made one side of the join unique
+        * and thereby converted it to an inner join, and chose not to do so, that
+        * is also worth noting.
+        *
+        * XXX: We admit too much non-unique advice, as in the following example
+        * from the regression tests: EXPLAIN (PLAN_ADVICE, COSTS OFF) DELETE FROM
+        * prt1_l WHERE EXISTS (SELECT 1 FROM int4_tbl, LATERAL (SELECT
+        * int4_tbl.f1 FROM int8_tbl LIMIT 2) ss WHERE prt1_l.c IS NULL). We emit
+        * SEMIJOIN_NON_UNIQUE((int4_tbl ss)) but create_unique_path() fails in
+        * this case, so there's no sj-unique version possible.
+        *
+        * NB: This code could appear slightly higher up in in this function, but
+        * none of the nodes through which we just descended should be have
+        * associated RTIs.
+        *
+        * NB: This seems like a somewhat hacky way of passing information up to
+        * the main tree walk, but I don't currently have a better idea.
+        */
+       if (uniqueouter)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, outerplan);
+       else if (jointype == JOIN_RIGHT_SEMI)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, outerplan);
+       if (uniqueinner)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, innerplan);
+       else if (jointype == JOIN_SEMI)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, innerplan);
+
+       /* Set output parameters. */
+       *realouter = outerplan;
+       *realinner = innerplan;
+       *elidedrealouter = elidedouter;
+       *elidedrealinner = elidedinner;
+       return strategy;
+}
+
+/*
+ * Descend through a Plan node in a join tree that the caller has determined
+ * to be irrelevant.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node.
+ */
+static ElidedNode *
+pgpa_descend_node(PlannedStmt *pstmt, Plan **plan)
+{
+       *plan = (*plan)->lefttree;
+       return pgpa_last_elided_node(pstmt, *plan);
+}
+
+/*
+ * Descend through a Gather or Gather Merge node, if present, and any Sort
+ * or IncrementalSort node occurring under a Gather Merge.
+ *
+ * Caller should have verified that there is no ElidedNode pertaining to
+ * the initial value of *plan.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node. Sets *found_any_gather = true if either Gather or
+ * Gather Merge was found, and otherwise leaves it unchanged.
+ */
+static ElidedNode *
+pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan,
+                                               bool *found_any_gather)
+{
+       if (IsA(*plan, Gather))
+       {
+               *found_any_gather = true;
+               return pgpa_descend_node(pstmt, plan);
+       }
+
+       if (IsA(*plan, GatherMerge))
+       {
+               ElidedNode *elided = pgpa_descend_node(pstmt, plan);
+
+               if (elided == NULL && is_sorting_plan(*plan))
+                       elided = pgpa_descend_node(pstmt, plan);
+
+               *found_any_gather = true;
+               return elided;
+       }
+
+       return NULL;
+}
+
+/*
+ * If *plan is an Agg or Unique node, we want to descend through it, unless
+ * it has a corresponding elided node. If its immediate child is a Sort or
+ * IncrementalSort, we also want to descend through that, unless it has a
+ * corresponding elided node.
+ *
+ * On entry, *elided_node must be the last of any elided nodes corresponding
+ * to *plan; on exit, this will still be true, but *plan may have been updated.
+ *
+ * The reason we don't want to descend through elided nodes is that a single
+ * join tree can't cross through any sort of elided node: subqueries are
+ * planned separately, and planning inside an Append or MergeAppend is
+ * separate from planning outside of it.
+ *
+ * The return value is true if we descend through at least one node, and
+ * otherwise false.
+ */
+static bool
+pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+                                               ElidedNode **elided_node)
+{
+       if (*elided_node != NULL)
+               return false;
+
+       if (IsA(*plan, Agg) || IsA(*plan, Unique))
+       {
+               *elided_node = pgpa_descend_node(pstmt, plan);
+
+               if (*elided_node == NULL && is_sorting_plan(*plan))
+                       *elided_node = pgpa_descend_node(pstmt, plan);
+
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * Is this a Result node that has a child?
+ */
+static bool
+is_result_node_with_child(Plan *plan)
+{
+       return IsA(plan, Result) && plan->lefttree != NULL;
+}
+
+/*
+ * Is this a Plan node whose purpose is put the data in a certain order?
+ */
+static bool
+is_sorting_plan(Plan *plan)
+{
+       return IsA(plan, Sort) || IsA(plan, IncrementalSort);
+}
diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h

new file mode 100644 (file)

index 0000000..4dc7298
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_join.h
@@ -0,0 +1,105 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.h
+ *       analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_join.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_JOIN_H
+#define PGPA_JOIN_H
+
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_plan_walker_context pgpa_plan_walker_context;
+typedef struct pgpa_join_unroller pgpa_join_unroller;
+typedef struct pgpa_unrolled_join pgpa_unrolled_join;
+
+/*
+ * Although there are three main join strategies, we try to classify things
+ * more precisely here: merge joins have the option of using materialization
+ * on the inner side, and nested loops can use either materialization or
+ * memoization.
+ */
+typedef enum
+{
+       JSTRAT_MERGE_JOIN_PLAIN = 0,
+       JSTRAT_MERGE_JOIN_MATERIALIZE,
+       JSTRAT_NESTED_LOOP_PLAIN,
+       JSTRAT_NESTED_LOOP_MATERIALIZE,
+       JSTRAT_NESTED_LOOP_MEMOIZE,
+       JSTRAT_HASH_JOIN
+       /* update NUM_PGPA_JOIN_STRATEGY if you add anything here */
+} pgpa_join_strategy;
+
+#define NUM_PGPA_JOIN_STRATEGY         ((int) JSTRAT_HASH_JOIN + 1)
+
+/*
+ * In an outer-deep join tree, every member of an unrolled join will be a scan,
+ * but join trees with other shapes can contain unrolled joins.
+ *
+ * The plan node we store here will be the inner or outer child of the join
+ * node, as appropriate, except that we look through subnodes that we regard as
+ * part of the join method itself. For instance, for a Nested Loop that
+ * materializes the inner input, we'll store the child of the Materialize node,
+ * not the Materialize node itself.
+ *
+ * If setrefs processing elided one or more nodes from the plan tree, then
+ * we'll store details about the topmost of those in elided_node; otherwise,
+ * it will be NULL.
+ *
+ * Exactly one of scan and unrolled_join will be non-NULL.
+ */
+typedef struct
+{
+       Plan       *plan;
+       ElidedNode *elided_node;
+       struct pgpa_scan *scan;
+       pgpa_unrolled_join *unrolled_join;
+} pgpa_join_member;
+
+/*
+ * We convert outer-deep join trees to a flat structure; that is, ((A JOIN B)
+ * JOIN C) JOIN D gets converted to outer = A, inner = <B C D>.  When joins
+ * aren't outer-deep, substructure is required, e.g. (A JOIN B) JOIN (C JOIN D)
+ * is represented as outer = A, inner = <B X>, where X is a pgpa_unrolled_join
+ * covering C-D.
+ */
+struct pgpa_unrolled_join
+{
+       /* Outermost member; must not itself be an unrolled join. */
+       pgpa_join_member outer;
+
+       /* Number of inner members. Length of the strategy and inner arrays. */
+       unsigned        ninner;
+
+       /* Array of strategies, one per non-outermost member. */
+       pgpa_join_strategy *strategy;
+
+       /* Array of members, excluding the outermost. Deepest first. */
+       pgpa_join_member *inner;
+};
+
+/*
+ * Does this plan node inherit from Join?
+ */
+static inline bool
+pgpa_is_join(Plan *plan)
+{
+       return IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin);
+}
+
+extern pgpa_join_unroller *pgpa_create_join_unroller(void);
+extern void pgpa_unroll_join(pgpa_plan_walker_context *walker,
+                                                        Plan *plan, bool beneath_any_gather,
+                                                        pgpa_join_unroller *join_unroller,
+                                                        pgpa_join_unroller **outer_join_unroller,
+                                                        pgpa_join_unroller **inner_join_unroller);
+extern pgpa_unrolled_join *pgpa_build_unrolled_join(pgpa_plan_walker_context *walker,
+                                                                                                       pgpa_join_unroller *join_unroller);
+extern void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_output.c b/contrib/pg_plan_advice/pgpa_output.c

new file mode 100644 (file)

index 0000000..89a675f
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_output.c
@@ -0,0 +1,628 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.c
+ *       produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_output.h"
+#include "pgpa_scan.h"
+
+#include "nodes/parsenodes.h"
+#include "parser/parsetree.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+/*
+ * Context object for textual advice generation.
+ *
+ * rt_identifiers is the caller-provided array of range table identifiers.
+ * See the comments at the top of pgpa_identifier.c for more details.
+ *
+ * buf is the caller-provided output buffer.
+ *
+ * wrap_column is the wrap column, so that we don't create output that is
+ * too wide. See pgpa_maybe_linebreak() and comments in pgpa_output_advice.
+ */
+typedef struct pgpa_output_context
+{
+       const char **rid_strings;
+       StringInfo      buf;
+       int                     wrap_column;
+} pgpa_output_context;
+
+static void pgpa_output_unrolled_join(pgpa_output_context *context,
+                                                                         pgpa_unrolled_join *join);
+static void pgpa_output_join_member(pgpa_output_context *context,
+                                                                       pgpa_join_member *member);
+static void pgpa_output_scan_strategy(pgpa_output_context *context,
+                                                                         pgpa_scan_strategy strategy,
+                                                                         List *scans);
+static void pgpa_output_bitmap_index_details(pgpa_output_context *context,
+                                                                                        Plan *plan);
+static void pgpa_output_relation_name(pgpa_output_context *context, Oid relid);
+static void pgpa_output_query_feature(pgpa_output_context *context,
+                                                                         pgpa_qf_type type,
+                                                                         List *query_features);
+static void pgpa_output_simple_strategy(pgpa_output_context *context,
+                                                                               char *strategy,
+                                                                               List *relid_sets);
+static void pgpa_output_no_gather(pgpa_output_context *context,
+                                                                 Bitmapset *relids);
+static void pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+                                                                 Bitmapset *relids);
+
+static char *pgpa_cstring_join_strategy(pgpa_join_strategy strategy);
+static char *pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy);
+static char *pgpa_cstring_query_feature_type(pgpa_qf_type type);
+
+static void pgpa_maybe_linebreak(StringInfo buf, int wrap_column);
+
+/*
+ * Append query advice to the provided buffer.
+ *
+ * Before calling this function, 'walker' must be used to iterate over the
+ * main plan tree and all subplans from the PlannedStmt.
+ *
+ * 'rt_identifiers' is a table of unique identifiers, one for each RTI.
+ * See pgpa_create_identifiers_for_planned_stmt().
+ *
+ * Results will be appended to 'buf'.
+ */
+void
+pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker,
+                                  pgpa_identifier *rt_identifiers)
+{
+       Index           rtable_length = list_length(walker->pstmt->rtable);
+       ListCell   *lc;
+       pgpa_output_context context;
+
+       /* Basic initialization. */
+       memset(&context, 0, sizeof(pgpa_output_context));
+       context.buf = buf;
+
+       /*
+        * Convert identifiers to string form. Note that the loop variable here is
+        * not an RTI, because RTIs are 1-based. Some RTIs will have no
+        * identifier, either because the reloptkind is RTE_JOIN or because that
+        * portion of the query didn't make it into the final plan.
+        */
+       context.rid_strings = palloc0_array(const char *, rtable_length);
+       for (int i = 0; i < rtable_length; ++i)
+               if (rt_identifiers[i].alias_name != NULL)
+                       context.rid_strings[i] = pgpa_identifier_string(&rt_identifiers[i]);
+
+       /*
+        * If the user chooses to use EXPLAIN (PLAN_ADVICE) in an 80-column window
+        * from a psql client with default settings, psql will add one space to
+        * the left of the output and EXPLAIN will add two more to the left of the
+        * advice. Thus, lines of more than 77 characters will wrap. We set the
+        * wrap limit to 76 here so that the output won't reach all the way to the
+        * very last column of the terminal.
+        *
+        * Of course, this is fairly arbitrary set of assumptions, and one could
+        * well make an argument for a different wrap limit, or for a configurable
+        * one.
+        */
+       context.wrap_column = 76;
+
+       /*
+        * Each piece of JOIN_ORDER() advice fully describes the join order for a
+        * a single unrolled join. Merging is not permitted, because that would
+        * change the meaning, e.g. SEQ_SCAN(a b c d) means simply that sequential
+        * scans should be used for all of those relations, and is thus equivalent
+        * to SEQ_SCAN(a b) SEQ_SCAN(c d), but JOIN_ORDER(a b c d) means that "a"
+        * is the driving table which is then joined to "b" then "c" then "d",
+        * which is totally different from JOIN_ORDER(a b) and JOIN_ORDER(c d).
+        */
+       foreach(lc, walker->toplevel_unrolled_joins)
+       {
+               pgpa_unrolled_join *ujoin = lfirst(lc);
+
+               if (buf->len > 0)
+                       appendStringInfoChar(buf, '\n');
+               appendStringInfo(context.buf, "JOIN_ORDER(");
+               pgpa_output_unrolled_join(&context, ujoin);
+               appendStringInfoChar(context.buf, ')');
+               pgpa_maybe_linebreak(context.buf, context.wrap_column);
+       }
+
+       /* Emit join strategy advice. */
+       for (int s = 0; s < NUM_PGPA_JOIN_STRATEGY; ++s)
+       {
+               char       *strategy = pgpa_cstring_join_strategy(s);
+
+               pgpa_output_simple_strategy(&context,
+                                                                       strategy,
+                                                                       walker->join_strategies[s]);
+       }
+
+       /*
+        * Emit scan strategy advice (but not for ordinary scans, which are
+        * definitionally uninteresting).
+        */
+       for (int c = 0; c < NUM_PGPA_SCAN_STRATEGY; ++c)
+               if (c != PGPA_SCAN_ORDINARY)
+                       pgpa_output_scan_strategy(&context, c, walker->scans[c]);
+
+       /* Emit query feature advice. */
+       for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t)
+               pgpa_output_query_feature(&context, t, walker->query_features[t]);
+
+       /* Emit NO_GATHER advice. */
+       pgpa_output_no_gather(&context, walker->no_gather_scans);
+}
+
+/*
+ * Output the members of an unrolled join, first the outermost member, and
+ * then the inner members one by one, as part of JOIN_ORDER() advice.
+ */
+static void
+pgpa_output_unrolled_join(pgpa_output_context *context,
+                                                 pgpa_unrolled_join *join)
+{
+       pgpa_output_join_member(context, &join->outer);
+
+       for (int k = 0; k < join->ninner; ++k)
+       {
+               pgpa_join_member *member = &join->inner[k];
+
+               pgpa_maybe_linebreak(context->buf, context->wrap_column);
+               appendStringInfoChar(context->buf, ' ');
+               pgpa_output_join_member(context, member);
+       }
+}
+
+/*
+ * Output a single member of an unrolled join as part of JOIN_ORDER() advice.
+ */
+static void
+pgpa_output_join_member(pgpa_output_context *context,
+                                               pgpa_join_member *member)
+{
+       if (member->unrolled_join != NULL)
+       {
+               appendStringInfoChar(context->buf, '(');
+               pgpa_output_unrolled_join(context, member->unrolled_join);
+               appendStringInfoChar(context->buf, ')');
+       }
+       else
+       {
+               pgpa_scan  *scan = member->scan;
+
+               Assert(scan != NULL);
+               if (bms_membership(scan->relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, scan->relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '{');
+                       pgpa_output_relations(context, context->buf, scan->relids);
+                       appendStringInfoChar(context->buf, '}');
+               }
+       }
+}
+
+/*
+ * Output advice for a List of pgpa_scan objects.
+ *
+ * All the scans must use the strategy specified by the "strategy" argument.
+ */
+static void
+pgpa_output_scan_strategy(pgpa_output_context *context,
+                                                 pgpa_scan_strategy strategy,
+                                                 List *scans)
+{
+       bool            first = true;
+
+       if (scans == NIL)
+               return;
+
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfo(context->buf, "%s(",
+                                        pgpa_cstring_scan_strategy(strategy));
+
+       foreach_ptr(pgpa_scan, scan, scans)
+       {
+               Plan       *plan = scan->plan;
+
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+
+               /* Output the relation identifiers. */
+               if (bms_membership(scan->relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, scan->relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '(');
+                       pgpa_output_relations(context, context->buf, scan->relids);
+                       appendStringInfoChar(context->buf, ')');
+               }
+
+               /* For scans involving indexes, output index information. */
+               if (strategy == PGPA_SCAN_INDEX)
+               {
+                       Assert(IsA(plan, IndexScan));
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+                       pgpa_output_relation_name(context, ((IndexScan *) plan)->indexid);
+               }
+               else if (strategy == PGPA_SCAN_INDEX_ONLY)
+               {
+                       Assert(IsA(plan, IndexOnlyScan));
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+                       pgpa_output_relation_name(context,
+                                                                         ((IndexOnlyScan *) plan)->indexid);
+               }
+               else if (strategy == PGPA_SCAN_BITMAP_HEAP)
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+                       pgpa_output_bitmap_index_details(context, plan->lefttree);
+               }
+       }
+
+       appendStringInfoChar(context->buf, ')');
+       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output information about which index or indexes power a BitmapHeapScan.
+ *
+ * We emit &&(i1 i2 i3) for a BitmapAnd between indexes i1, i2, and i3;
+ * and likewise ||(i1 i2 i3) for a similar BitmapOr operation.
+ */
+static void
+pgpa_output_bitmap_index_details(pgpa_output_context *context, Plan *plan)
+{
+       char       *operator;
+       List       *bitmapplans;
+       bool            first = true;
+
+       if (IsA(plan, BitmapIndexScan))
+       {
+               BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan;
+
+               pgpa_output_relation_name(context, bitmapindexscan->indexid);
+               return;
+       }
+
+       if (IsA(plan, BitmapOr))
+       {
+               operator = "||";
+               bitmapplans = ((BitmapOr *) plan)->bitmapplans;
+       }
+       else if (IsA(plan, BitmapAnd))
+       {
+               operator = "&&";
+               bitmapplans = ((BitmapAnd *) plan)->bitmapplans;
+       }
+       else
+               elog(ERROR, "unexpected node type: %d", (int) nodeTag(plan));
+
+       appendStringInfo(context->buf, "%s(", operator);
+       foreach_ptr(Plan, child_plan, bitmapplans)
+       {
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+               pgpa_output_bitmap_index_details(context, child_plan);
+       }
+       appendStringInfoChar(context->buf, ')');
+}
+
+/*
+ * Output a schema-qualified relation name.
+ */
+static void
+pgpa_output_relation_name(pgpa_output_context *context, Oid relid)
+{
+       Oid                     nspoid = get_rel_namespace(relid);
+       char       *relnamespace = get_namespace_name_or_temp(nspoid);
+       char       *relname = get_rel_name(relid);
+
+       appendStringInfoString(context->buf, quote_identifier(relnamespace));
+       appendStringInfoChar(context->buf, '.');
+       appendStringInfoString(context->buf, quote_identifier(relname));
+}
+
+/*
+ * Output advice for a List of pgpa_query_feature objects.
+ *
+ * All features must be of the type specified by the "type" argument.
+ */
+static void
+pgpa_output_query_feature(pgpa_output_context *context, pgpa_qf_type type,
+                                                 List *query_features)
+{
+       bool            first = true;
+
+       if (query_features == NIL)
+               return;
+
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfo(context->buf, "%s(",
+                                        pgpa_cstring_query_feature_type(type));
+
+       foreach_ptr(pgpa_query_feature, qf, query_features)
+       {
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+
+               if (bms_membership(qf->relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, qf->relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '(');
+                       pgpa_output_relations(context, context->buf, qf->relids);
+                       appendStringInfoChar(context->buf, ')');
+               }
+       }
+
+       appendStringInfoChar(context->buf, ')');
+       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output "simple" advice for a List of Bitmapset objects each of which
+ * contains one or more RTIs.
+ *
+ * By simple, we just mean that the advice emitted follows the most
+ * straightforward pattern: the strategy name, followed by a list of items
+ * separated by spaces and surrounded by parentheses. Individual items in
+ * the list are a single relation identifier for a Bitmapset that contains
+ * just one member, or a sub-list again separated by spaces and surrounded
+ * by parentheses for a Bitmapset with multiple members. Bitmapsets with
+ * no members probably shouldn't occur here, but if they do they'll be
+ * rendered as an empty sub-list.
+ */
+static void
+pgpa_output_simple_strategy(pgpa_output_context *context, char *strategy,
+                                                       List *relid_sets)
+{
+       bool            first = true;
+
+       if (relid_sets == NIL)
+               return;
+
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfo(context->buf, "%s(", strategy);
+
+       foreach_node(Bitmapset, relids, relid_sets)
+       {
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+
+               if (bms_membership(relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '(');
+                       pgpa_output_relations(context, context->buf, relids);
+                       appendStringInfoChar(context->buf, ')');
+               }
+       }
+
+       appendStringInfoChar(context->buf, ')');
+       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output NO_GATHER advice for all relations not appearing beneath any
+ * Gather or Gather Merge node.
+ */
+static void
+pgpa_output_no_gather(pgpa_output_context *context, Bitmapset *relids)
+{
+       if (relids == NULL)
+               return;
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfoString(context->buf, "NO_GATHER(");
+       pgpa_output_relations(context, context->buf, relids);
+       appendStringInfoChar(context->buf, ')');
+}
+
+/*
+ * Output the identifiers for each RTI in the provided set.
+ *
+ * Identifiers are separated by spaces, and a line break is possible after
+ * each one.
+ */
+static void
+pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+                                         Bitmapset *relids)
+{
+       int                     rti = -1;
+       bool            first = true;
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               const char *rid_string = context->rid_strings[rti - 1];
+
+               if (rid_string == NULL)
+                       elog(ERROR, "no identifier for RTI %d", rti);
+
+               if (first)
+               {
+                       first = false;
+                       appendStringInfoString(buf, rid_string);
+               }
+               else
+               {
+                       pgpa_maybe_linebreak(buf, context->wrap_column);
+                       appendStringInfo(buf, " %s", rid_string);
+               }
+       }
+}
+
+/*
+ * Get a C string that corresponds to the specified join strategy.
+ */
+static char *
+pgpa_cstring_join_strategy(pgpa_join_strategy strategy)
+{
+       switch (strategy)
+       {
+               case JSTRAT_MERGE_JOIN_PLAIN:
+                       return "MERGE_JOIN_PLAIN";
+               case JSTRAT_MERGE_JOIN_MATERIALIZE:
+                       return "MERGE_JOIN_MATERIALIZE";
+               case JSTRAT_NESTED_LOOP_PLAIN:
+                       return "NESTED_LOOP_PLAIN";
+               case JSTRAT_NESTED_LOOP_MATERIALIZE:
+                       return "NESTED_LOOP_MATERIALIZE";
+               case JSTRAT_NESTED_LOOP_MEMOIZE:
+                       return "NESTED_LOOP_MEMOIZE";
+               case JSTRAT_HASH_JOIN:
+                       return "HASH_JOIN";
+       }
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Get a C string that corresponds to the specified scan strategy.
+ */
+static char *
+pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy)
+{
+       switch (strategy)
+       {
+               case PGPA_SCAN_ORDINARY:
+                       return "ORDINARY_SCAN";
+               case PGPA_SCAN_SEQ:
+                       return "SEQ_SCAN";
+               case PGPA_SCAN_BITMAP_HEAP:
+                       return "BITMAP_HEAP_SCAN";
+               case PGPA_SCAN_FOREIGN:
+                       return "FOREIGN_JOIN";
+               case PGPA_SCAN_INDEX:
+                       return "INDEX_SCAN";
+               case PGPA_SCAN_INDEX_ONLY:
+                       return "INDEX_ONLY_SCAN";
+               case PGPA_SCAN_PARTITIONWISE:
+                       return "PARTITIONWISE";
+               case PGPA_SCAN_TID:
+                       return "TID_SCAN";
+       }
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Get a C string that corresponds to the specified scan strategy.
+ */
+static char *
+pgpa_cstring_query_feature_type(pgpa_qf_type type)
+{
+       switch (type)
+       {
+               case PGPAQF_GATHER:
+                       return "GATHER";
+               case PGPAQF_GATHER_MERGE:
+                       return "GATHER_MERGE";
+               case PGPAQF_SEMIJOIN_NON_UNIQUE:
+                       return "SEMIJOIN_NON_UNIQUE";
+               case PGPAQF_SEMIJOIN_UNIQUE:
+                       return "SEMIJOIN_UNIQUE";
+       }
+
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Insert a line break into the StringInfoData, if needed.
+ *
+ * If wrap_column is zero or negative, this does nothing. Otherwise, we
+ * consider inserting a newline. We only insert a newline if the length of
+ * the last line in the buffer exceeds wrap_column, and not if we'd be
+ * inserting a newline at or before the beginning of the current line.
+ *
+ * The position at which the newline is inserted is simply wherever the
+ * buffer ended the last time this function was called. In other words,
+ * the caller is expected to call this function every time we reach a good
+ * place for a line break.
+ */
+static void
+pgpa_maybe_linebreak(StringInfo buf, int wrap_column)
+{
+       char       *trailing_nl;
+       int                     line_start;
+       int                     save_cursor;
+
+       /* If line wrapping is disabled, exit quickly. */
+       if (wrap_column <= 0)
+               return;
+
+       /*
+        * Set line_start to the byte offset within buf->data of the first
+        * character of the current line, where the current line means the last
+        * one in the buffer. Note that line_start could be the offset of the
+        * trailing '\0' if the last character in the buffer is a line break.
+        */
+       trailing_nl = strrchr(buf->data, '\n');
+       if (trailing_nl == NULL)
+               line_start = 0;
+       else
+               line_start = (trailing_nl - buf->data) + 1;
+
+       /*
+        * Remember that the current end of the buffer is a potential location to
+        * insert a line break on a future call to this function.
+        */
+       save_cursor = buf->cursor;
+       buf->cursor = buf->len;
+
+       /* If we haven't passed the wrap column, we don't need a newline. */
+       if (buf->len - line_start <= wrap_column)
+               return;
+
+       /*
+        * It only makes sense to insert a newline at a position later than the
+        * beginning of the current line.
+        */
+       if (buf->cursor <= line_start)
+               return;
+
+       /* Insert a newline at the previous cursor location. */
+       enlargeStringInfo(buf, 1);
+       memmove(&buf->data[save_cursor] + 1, &buf->data[save_cursor],
+                       buf->len - save_cursor);
+       ++buf->cursor;
+       buf->data[++buf->len] = '\0';
+       buf->data[save_cursor] = '\n';
+}
diff --git a/contrib/pg_plan_advice/pgpa_output.h b/contrib/pg_plan_advice/pgpa_output.h

new file mode 100644 (file)

index 0000000..47496d7
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_output.h
@@ -0,0 +1,22 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.h
+ *       produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_OUTPUT_H
+#define PGPA_OUTPUT_H
+
+#include "pgpa_identifier.h"
+#include "pgpa_walker.h"
+
+extern void pgpa_output_advice(StringInfo buf,
+                                                          pgpa_plan_walker_context *walker,
+                                                          pgpa_identifier *rt_identifiers);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_parser.y b/contrib/pg_plan_advice/pgpa_parser.y

new file mode 100644 (file)

index 0000000..4617e7f
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_parser.y
@@ -0,0 +1,337 @@
+%{
+/*
+ * Parser for plan advice
+ *
+ * Copyright (c) 2000-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_parser.y
+ */
+
+#include "postgres.h"
+
+#include <float.h>
+#include <math.h>
+
+#include "fmgr.h"
+#include "nodes/miscnodes.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
+
+#include "pgpa_ast.h"
+#include "pgpa_parser.h"
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc.  This prevents
+ * memory leaks if we error out during parsing.
+ */
+#define YYMALLOC palloc
+#define YYFREE   pfree
+%}
+
+/* BISON Declarations */
+%parse-param {List **result}
+%parse-param {char **parse_error_msg_p}
+%parse-param {yyscan_t yyscanner}
+%lex-param {List **result}
+%lex-param {char **parse_error_msg_p}
+%lex-param {yyscan_t yyscanner}
+%pure-parser
+%expect 0
+%name-prefix="pgpa_yy"
+
+%union
+{
+       char       *str;
+       int                     integer;
+       List       *list;
+       pgpa_advice_item *item;
+       pgpa_advice_target *target;
+       pgpa_index_target *itarget;
+}
+%token <str> TOK_IDENT TOK_TAG_JOIN_ORDER TOK_TAG_BITMAP TOK_TAG_INDEX
+%token <str> TOK_TAG_SIMPLE TOK_TAG_GENERIC
+%token <integer> TOK_INTEGER
+%token TOK_OR TOK_AND
+
+%type <integer> opt_ri_occurrence
+%type <item> advice_item
+%type <list> advice_item_list bitmap_sublist bitmap_target_list generic_target_list
+%type <list> index_target_list join_order_target_list
+%type <list> opt_partition simple_target_list
+%type <str> identifier opt_plan_name
+%type <target> generic_sublist join_order_sublist
+%type <target> relation_identifier
+%type <itarget> bitmap_target_item index_name
+
+%start parse_toplevel
+
+/* Grammar follows */
+%%
+
+parse_toplevel: advice_item_list
+               {
+                       (void) yynerrs;                         /* suppress compiler warning */
+                       *result = $1;
+               }
+       ;
+
+advice_item_list: advice_item_list advice_item
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+advice_item: TOK_TAG_JOIN_ORDER '(' join_order_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       $$->tag = PGPA_TAG_JOIN_ORDER;
+                       $$->targets = $3;
+               }
+       | TOK_TAG_INDEX '(' index_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       if (strcmp($1, "index_only_scan") == 0)
+                               $$->tag = PGPA_TAG_INDEX_ONLY_SCAN;
+                       else if (strcmp($1, "index_scan") == 0)
+                               $$->tag = PGPA_TAG_INDEX_SCAN;
+                       else
+                               elog(ERROR, "tag parsing failed: %s", $1);
+                       $$->targets = $3;
+               }
+       | TOK_TAG_BITMAP '(' bitmap_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       $$->tag = PGPA_TAG_BITMAP_HEAP_SCAN;
+                       $$->targets = $3;
+               }
+       | TOK_TAG_SIMPLE '(' simple_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       if (strcmp($1, "no_gather") == 0)
+                               $$->tag = PGPA_TAG_NO_GATHER;
+                       else if (strcmp($1, "seq_scan") == 0)
+                               $$->tag = PGPA_TAG_SEQ_SCAN;
+                       else if (strcmp($1, "tid_scan") == 0)
+                               $$->tag = PGPA_TAG_TID_SCAN;
+                       else
+                               elog(ERROR, "tag parsing failed: %s", $1);
+                       $$->targets = $3;
+               }
+       | TOK_TAG_GENERIC '(' generic_target_list ')'
+               {
+                       bool    fail;
+
+                       $$ = palloc0_object(pgpa_advice_item);
+                       $$->tag = pgpa_parse_advice_tag($1, &fail);
+                       if (fail)
+                       {
+                               pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                        "unrecognized advice tag");
+                       }
+
+                       if ($$->tag == PGPA_TAG_FOREIGN_JOIN)
+                       {
+                               foreach_ptr(pgpa_advice_target, target, $3)
+                               {
+                                       if (target->ttype == PGPA_TARGET_IDENTIFIER ||
+                                               list_length(target->children) == 1)
+                                                       pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                                "FOREIGN_JOIN targets must contain more than one relation identifier");
+                               }
+                       }
+
+                       $$->targets = $3;
+               }
+       ;
+
+relation_identifier: identifier opt_ri_occurrence opt_partition opt_plan_name
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_IDENTIFIER;
+                       $$->rid.alias_name = $1;
+                       $$->rid.occurrence = $2;
+                       if (list_length($3) == 2)
+                       {
+                               $$->rid.partnsp = linitial($3);
+                               $$->rid.partrel = lsecond($3);
+                       }
+                       else if ($3 != NIL)
+                               $$->rid.partrel = linitial($3);
+                       $$->rid.plan_name = $4;
+               }
+       ;
+
+index_name: identifier
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_NAME;
+                       $$->indname = $1;
+               }
+       | identifier '.' identifier
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_NAME;
+                       $$->indnamespace = $1;
+                       $$->indname = $3;
+               }
+       ;
+
+opt_ri_occurrence:
+       '#' TOK_INTEGER
+               {
+                       if ($2 <= 0)
+                               pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                        "only positive occurrence numbers are permitted");
+                       $$ = $2;
+               }
+       |
+               {
+                       /* The default occurrence number is 1. */
+                       $$ = 1;
+               }
+       ;
+
+identifier: TOK_IDENT
+       | TOK_TAG_JOIN_ORDER
+       | TOK_TAG_INDEX
+       | TOK_TAG_BITMAP
+       | TOK_TAG_SIMPLE
+       | TOK_TAG_GENERIC
+       ;
+
+/*
+ * When generating advice, we always schema-qualify the partition name, but
+ * when parsing advice, we accept a specification that lacks one.
+ */
+opt_partition:
+       '/' TOK_IDENT '.' TOK_IDENT
+               { $$ = list_make2($2, $4); }
+       | '/' TOK_IDENT
+               { $$ = list_make1($2); }
+       |
+               { $$ = NIL; }
+       ;
+
+opt_plan_name:
+       '@' TOK_IDENT
+               { $$ = $2; }
+       |
+               { $$ = NULL; }
+       ;
+
+bitmap_target_list: bitmap_target_list relation_identifier bitmap_target_item
+               {
+                       $2->itarget = $3;
+                       $$ = lappend($1, $2);
+               }
+       |
+               { $$ = NIL; }
+       ;
+
+bitmap_target_item: index_name
+               { $$ = $1; }
+       | TOK_OR '(' bitmap_sublist ')'
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_OR;
+                       $$->children = $3;
+               }
+       | TOK_AND '(' bitmap_sublist ')'
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_AND;
+                       $$->children = $3;
+               }
+       ;
+
+bitmap_sublist: bitmap_sublist bitmap_target_item
+               { $$ = lappend($1, $2); }
+       | bitmap_target_item
+               { $$ = list_make1($1); }
+       ;
+
+generic_target_list: generic_target_list relation_identifier
+               { $$ = lappend($1, $2); }
+       | generic_target_list generic_sublist
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+generic_sublist: '(' generic_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_ORDERED_LIST;
+                       $$->children = $2;
+               }
+       ;
+
+index_target_list:
+         index_target_list relation_identifier index_name
+               {
+                       $2->itarget = $3;
+                       $$ = lappend($1, $2);
+               }
+       |
+               { $$ = NIL; }
+       ;
+
+join_order_target_list: join_order_target_list relation_identifier
+               { $$ = lappend($1, $2); }
+       | join_order_target_list join_order_sublist
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+join_order_sublist:
+       '(' join_order_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_ORDERED_LIST;
+                       $$->children = $2;
+               }
+       | '{' simple_target_list '}'
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_UNORDERED_LIST;
+                       $$->children = $2;
+               }
+       ;
+
+simple_target_list: simple_target_list relation_identifier
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+%%
+
+/*
+ * Parse an advice_string and return the resulting list of pgpa_advice_item
+ * objects. If a parse error occurs, instead return NULL.
+ *
+ * If the return value is NULL, *error_p will be set to the error message;
+ * otherwise, *error_p will be set to NULL.
+ */
+List *
+pgpa_parse(const char *advice_string, char **error_p)
+{
+       yyscan_t        scanner;
+       List       *result;
+       char       *error = NULL;
+
+       pgpa_scanner_init(advice_string, &scanner);
+       pgpa_yyparse(&result, &error, scanner);
+       pgpa_scanner_finish(scanner);
+
+       if (error != NULL)
+       {
+               *error_p = error;
+               return NULL;
+       }
+
+       *error_p = NULL;
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_planner.c b/contrib/pg_plan_advice/pgpa_planner.c

new file mode 100644 (file)

index 0000000..767facc
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_planner.c
@@ -0,0 +1,1706 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_planner.c
+ *       planner hooks
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_planner.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+#include "pgpa_identifier.h"
+#include "pgpa_output.h"
+#include "pgpa_planner.h"
+#include "pgpa_trove.h"
+#include "pgpa_walker.h"
+
+#include "common/hashfn_unstable.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/extendplan.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/plancat.h"
+#include "optimizer/planner.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * When assertions are enabled, we try generating relation identifiers during
+ * planning, saving them in a hash table, and then cross-checking them against
+ * the ones generated after planning is complete.
+ */
+typedef struct pgpa_ri_checker_key
+{
+       char       *plan_name;
+       Index           rti;
+} pgpa_ri_checker_key;
+
+typedef struct pgpa_ri_checker
+{
+       pgpa_ri_checker_key key;
+       uint32          status;
+       const char *rid_string;
+} pgpa_ri_checker;
+
+static uint32 pgpa_ri_checker_hash_key(pgpa_ri_checker_key key);
+
+static inline bool
+pgpa_ri_checker_compare_key(pgpa_ri_checker_key a, pgpa_ri_checker_key b)
+{
+       if (a.rti != b.rti)
+               return false;
+       if (a.plan_name == NULL)
+               return (b.plan_name == NULL);
+       if (b.plan_name == NULL)
+               return false;
+       return strcmp(a.plan_name, b.plan_name) == 0;
+}
+
+#define SH_PREFIX                      pgpa_ri_check
+#define SH_ELEMENT_TYPE                pgpa_ri_checker
+#define SH_KEY_TYPE                    pgpa_ri_checker_key
+#define SH_KEY                         key
+#define SH_HASH_KEY(tb, key)   pgpa_ri_checker_hash_key(key)
+#define        SH_EQUAL(tb, a, b)      pgpa_ri_checker_compare_key(a, b)
+#define SH_SCOPE                       static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+#endif
+
+typedef struct pgpa_planner_state
+{
+       ExplainState *explain_state;
+       pgpa_trove *trove;
+       MemoryContext trove_cxt;
+
+#ifdef USE_ASSERT_CHECKING
+       pgpa_ri_check_hash *ri_check_hash;
+#endif
+} pgpa_planner_state;
+
+typedef struct pgpa_join_state
+{
+       /* Most-recently-considered outer rel. */
+       RelOptInfo *outerrel;
+
+       /* Most-recently-considered inner rel. */
+       RelOptInfo *innerrel;
+
+       /*
+        * Array of relation identifiers for all members of this joinrel, with
+        * outerrel idenifiers before innerrel identifiers.
+        */
+       pgpa_identifier *rids;
+
+       /* Number of outer rel identifiers. */
+       int                     outer_count;
+
+       /* Number of inner rel identifiers. */
+       int                     inner_count;
+
+       /*
+        * Trove lookup results.
+        *
+        * join_entries and rel_entries are arrays of entries, and join_indexes
+        * and rel_indexes are the integer offsets within those arrays of entries
+        * potentially relevant to us. The "join" fields correspond to a lookup
+        * using PGPA_TROVE_LOOKUP_JOIN and the "rel" fields to a lookup using
+        * PGPA_TROVE_LOOKUP_REL.
+        */
+       pgpa_trove_entry *join_entries;
+       Bitmapset  *join_indexes;
+       pgpa_trove_entry *rel_entries;
+       Bitmapset  *rel_indexes;
+} pgpa_join_state;
+
+/* Saved hook values */
+static get_relation_info_hook_type prev_get_relation_info = NULL;
+static join_path_setup_hook_type prev_join_path_setup = NULL;
+static joinrel_setup_hook_type prev_joinrel_setup = NULL;
+static planner_setup_hook_type prev_planner_setup = NULL;
+static planner_shutdown_hook_type prev_planner_shutdown = NULL;
+
+/* Other global variabes */
+static int     planner_extension_id = -1;
+
+/* Function prototypes. */
+static void pgpa_get_relation_info(PlannerInfo *root,
+                                                                  Oid relationObjectId,
+                                                                  bool inhparent,
+                                                                  RelOptInfo *rel);
+static void pgpa_joinrel_setup(PlannerInfo *root,
+                                                          RelOptInfo *joinrel,
+                                                          RelOptInfo *outerrel,
+                                                          RelOptInfo *innerrel,
+                                                          SpecialJoinInfo *sjinfo,
+                                                          List *restrictlist);
+static void pgpa_join_path_setup(PlannerInfo *root,
+                                                                RelOptInfo *joinrel,
+                                                                RelOptInfo *outerrel,
+                                                                RelOptInfo *innerrel,
+                                                                JoinType jointype,
+                                                                JoinPathExtraData *extra);
+static void pgpa_planner_setup(PlannerGlobal *glob, Query *parse,
+                                                          const char *query_string,
+                                                          double *tuple_fraction,
+                                                          ExplainState *es);
+static void pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
+                                                                 const char *query_string, PlannedStmt *pstmt);
+static void pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p,
+                                                                                         char *plan_name,
+                                                                                         pgpa_join_state *pjs);
+static void pgpa_planner_apply_join_path_advice(JoinType jointype,
+                                                                                               uint64 *pgs_mask_p,
+                                                                                               char *plan_name,
+                                                                                               pgpa_join_state *pjs);
+static void pgpa_planner_apply_scan_advice(RelOptInfo *rel,
+                                                                                  pgpa_trove_entry *scan_entries,
+                                                                                  Bitmapset *scan_indexes,
+                                                                                  pgpa_trove_entry *rel_entries,
+                                                                                  Bitmapset *rel_indexes);
+static uint64 pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag);
+static bool pgpa_join_order_permits_join(int outer_count, int inner_count,
+                                                                                pgpa_identifier *rids,
+                                                                                pgpa_trove_entry *entry);
+static bool pgpa_join_method_permits_join(int outer_count, int inner_count,
+                                                                                 pgpa_identifier *rids,
+                                                                                 pgpa_trove_entry *entry,
+                                                                                 bool *restrict_method);
+static bool pgpa_opaque_join_permits_join(int outer_count, int inner_count,
+                                                                                 pgpa_identifier *rids,
+                                                                                 pgpa_trove_entry *entry,
+                                                                                 bool *restrict_method);
+
+static List *pgpa_planner_append_feedback(List *list, pgpa_trove *trove,
+                                                                                 pgpa_trove_lookup_type type,
+                                                                                 pgpa_identifier *rt_identifiers,
+                                                                                 pgpa_plan_walker_context *walker);
+
+static inline void pgpa_ri_checker_save(pgpa_planner_state *pps,
+                                                                               PlannerInfo *root,
+                                                                               RelOptInfo *rel);
+static void pgpa_ri_checker_validate(pgpa_planner_state *pps,
+                                                                        PlannedStmt *pstmt);
+
+/*
+ * Install planner-related hooks.
+ */
+void
+pgpa_planner_install_hooks(void)
+{
+       planner_extension_id = GetPlannerExtensionId("pg_plan_advice");
+       prev_get_relation_info = get_relation_info_hook;
+       get_relation_info_hook = pgpa_get_relation_info;
+       prev_joinrel_setup = joinrel_setup_hook;
+       joinrel_setup_hook = pgpa_joinrel_setup;
+       prev_join_path_setup = join_path_setup_hook;
+       join_path_setup_hook = pgpa_join_path_setup;
+       prev_planner_setup = planner_setup_hook;
+       planner_setup_hook = pgpa_planner_setup;
+       prev_planner_shutdown = planner_shutdown_hook;
+       planner_shutdown_hook = pgpa_planner_shutdown;
+}
+
+/*
+ * Hook function for get_relation_info().
+ *
+ * We can apply scan advice at this opint, and we also usee this as an
+ * opportunity to do range-table identifier cross-checking in assert-enabled
+ * builds.
+ *
+ * XXX: We currently emit useless advice like NO_GATHER("*RESULT*") for trivial
+ * queries. The advice is useless because get_relation_info isn't called for
+ * non-relation RTEs. We should either suppress the advice in such cases, or
+ * add a hook that can apply it.
+ */
+static void
+pgpa_get_relation_info(PlannerInfo *root, Oid relationObjectId,
+                                          bool inhparent, RelOptInfo *rel)
+{
+       pgpa_planner_state *pps;
+
+       /* Fetch our private state, set up by pgpa_planner_setup(). */
+       pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+
+       /* Save details needed for range table identifier cross-checking. */
+       if (pps != NULL)
+               pgpa_ri_checker_save(pps, root, rel);
+
+       /* If query advice was provided, search for relevant entries. */
+       if (pps != NULL && pps->trove != NULL)
+       {
+               pgpa_identifier rid;
+               pgpa_trove_result tresult_scan;
+               pgpa_trove_result tresult_rel;
+
+               /* Search for scan advice and general rel advice. */
+               pgpa_compute_identifier_by_rti(root, rel->relid, &rid);
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, &rid,
+                                                 &tresult_scan);
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, 1, &rid,
+                                                 &tresult_rel);
+
+               /* If relevant entries were found, apply them. */
+               if (tresult_scan.indexes != NULL || tresult_rel.indexes != NULL)
+                       pgpa_planner_apply_scan_advice(rel,
+                                                                                  tresult_scan.entries,
+                                                                                  tresult_scan.indexes,
+                                                                                  tresult_rel.entries,
+                                                                                  tresult_rel.indexes);
+       }
+
+       /* Pass call to previous hook. */
+       if (prev_get_relation_info)
+               (*prev_get_relation_info) (root, relationObjectId, inhparent, rel);
+}
+
+/*
+ * Search for advice pertaining to a proposed join.
+ */
+static pgpa_join_state *
+pgpa_get_join_state(PlannerInfo *root, RelOptInfo *joinrel,
+                                       RelOptInfo *outerrel, RelOptInfo *innerrel)
+{
+       pgpa_planner_state *pps;
+       pgpa_join_state *pjs;
+       bool            new_pjs = false;
+
+       /* Fetch our private state, set up by pgpa_planner_setup(). */
+       pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+       if (pps == NULL || pps->trove == NULL)
+       {
+               /* No advice applies to this query, hence none to this joinrel. */
+               return NULL;
+       }
+
+       /*
+        * See whether we've previously associated a pgpa_join_state with this
+        * joinrel. If we have not, we need to try to construct one. If we have,
+        * then there are two cases: (a) if innerrel and outerrel are unchanged,
+        * we can simply use it, and (b) if they have changed, we need to rejigger
+        * the array of identifiers but can still skip the trove lookup.
+        */
+       pjs = GetRelOptInfoExtensionState(joinrel, planner_extension_id);
+       if (pjs != NULL)
+       {
+               if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL)
+               {
+                       /*
+                        * If there's no potentially relevant advice, then the presence of
+                        * this pgpa_join_state acts like a negative cache entry: it tells
+                        * us not to bother searching the trove for advice, because we
+                        * will not find any.
+                        */
+                       return NULL;
+               }
+
+               if (pjs->outerrel == outerrel && pjs->innerrel == innerrel)
+               {
+                       /* No updates required, so just return. */
+                       /* XXX. Does this need to do something different under GEQO? */
+                       return pjs;
+               }
+       }
+
+       /*
+        * If there's no pgpa_join_state yet, we need to allocate one. Trove keys
+        * will not get built for RTE_JOIN RTEs, so the array may end up being
+        * larger than needed. It's not worth trying to compute a perfectly
+        * accurate count here.
+        */
+       if (pjs == NULL)
+       {
+               int                     pessimistic_count = bms_num_members(joinrel->relids);
+
+               pjs = palloc0_object(pgpa_join_state);
+               pjs->rids = palloc_array(pgpa_identifier, pessimistic_count);
+               new_pjs = true;
+       }
+
+       /*
+        * Either we just allocated a new pgpa_join_state, or the existing one
+        * needs reconfiguring for a new innerrel and outerrel. The required array
+        * size can't change, so we can overwrite the existing one.
+        */
+       pjs->outerrel = outerrel;
+       pjs->innerrel = innerrel;
+       pjs->outer_count =
+               pgpa_compute_identifiers_by_relids(root, outerrel->relids, pjs->rids);
+       pjs->inner_count =
+               pgpa_compute_identifiers_by_relids(root, innerrel->relids,
+                                                                                  pjs->rids + pjs->outer_count);
+
+       /*
+        * If we allocated a new pgpa_join_state, search our trove of advice for
+        * relevant entries. The trove lookup will return the same results for
+        * every outerrel/innerrel combination, so we don't need to repeat that
+        * work every time.
+        */
+       if (new_pjs)
+       {
+               pgpa_trove_result tresult;
+
+               /* Find join entries. */
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_JOIN,
+                                                 pjs->outer_count + pjs->inner_count,
+                                                 pjs->rids, &tresult);
+               pjs->join_entries = tresult.entries;
+               pjs->join_indexes = tresult.indexes;
+
+               /* Find rel entries. */
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL,
+                                                 pjs->outer_count + pjs->inner_count,
+                                                 pjs->rids, &tresult);
+               pjs->rel_entries = tresult.entries;
+               pjs->rel_indexes = tresult.indexes;
+
+               /* Now that the new pgpa_join_state is fully valid, save a pointer. */
+               SetRelOptInfoExtensionState(joinrel, planner_extension_id, pjs);
+
+               /*
+                * If there was no relevant advice found, just return NULL. This
+                * pgpa_join_state will stick around as a sort of negative cache
+                * entry, so that future calls for this same joinrel quickly return
+                * NULL.
+                */
+               if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL)
+                       return NULL;
+       }
+
+       return pjs;
+}
+
+/*
+ * Enforce any provided advice that is relevant to any method of implementing
+ * this join.
+ *
+ * Although we're passed the outerrel and innerrel here, those are just
+ * whatever values happened to prompt the creation of this joinrel; they
+ * shouldn't really influence our choice of what advice to apply.
+ */
+static void
+pgpa_joinrel_setup(PlannerInfo *root, RelOptInfo *joinrel,
+                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                  SpecialJoinInfo *sjinfo, List *restrictlist)
+{
+       pgpa_join_state *pjs;
+
+       Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE);
+
+       /* Get our private state information for this join. */
+       pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel);
+
+       /* If there is relevant advice, call a helper function to apply it. */
+       if (pjs != NULL)
+               pgpa_planner_apply_joinrel_advice(&joinrel->pgs_mask,
+                                                                                 root->plan_name,
+                                                                                 pjs);
+
+       /* Pass call to previous hook. */
+       if (prev_joinrel_setup)
+               (*prev_joinrel_setup) (root, joinrel, outerrel, innerrel,
+                                                          sjinfo, restrictlist);
+}
+
+/*
+ * Enforce any provided advice that is relevant to this particular method of
+ * implementing this particular join.
+ */
+static void
+pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel,
+                                        RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                        JoinType jointype, JoinPathExtraData *extra)
+{
+       pgpa_join_state *pjs;
+
+       Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE);
+
+       /* Get our private state information for this join. */
+       pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel);
+
+       /* If there is relevant advice, call a helper function to apply it. */
+       if (pjs != NULL)
+               pgpa_planner_apply_join_path_advice(jointype,
+                                                                                       &extra->pgs_mask,
+                                                                                       root->plan_name,
+                                                                                       pjs);
+
+       /* Pass call to previous hook. */
+       if (prev_join_path_setup)
+               (*prev_join_path_setup) (root, joinrel, outerrel, innerrel,
+                                                                jointype, extra);
+}
+
+/*
+ * Prepare advice for use by a query.
+ */
+static void
+pgpa_planner_setup(PlannerGlobal *glob, Query *parse, const char *query_string,
+                                  double *tuple_fraction, ExplainState *es)
+{
+       pgpa_trove *trove = NULL;
+       pgpa_planner_state *pps;
+       char       *error;
+       bool            needs_pps = false;
+
+       /*
+        * If any advice was provided, build a trove of advice for use during
+        * planning.
+        */
+       if (pg_plan_advice_advice != NULL && pg_plan_advice_advice[0] != '\0')
+       {
+               List       *advice_items;
+
+               /*
+                * Parsing shouldn't fail here, because we must have previously parsed
+                * successfully in pg_plan_advice_advice_check_hook, but if it does,
+                * emit a warning.
+                */
+               advice_items = pgpa_parse(pg_plan_advice_advice, &error);
+               if (error)
+                       elog(WARNING, "could not parse advice: %s", error);
+
+               /*
+                * It's possible that the advice string was non-empty but contained no
+                * actual advice, e.g. it was all whitespace.
+                */
+               if (advice_items != NIL)
+               {
+                       trove = pgpa_build_trove(advice_items);
+                       needs_pps = true;
+               }
+       }
+
+#ifdef USE_ASSERT_CHECKING
+
+       /*
+        * If asserts are enabled, always build a private state object for
+        * cross-checks.
+        */
+       needs_pps = true;
+#endif
+
+       /* Initialize and store private state, if required. */
+       if (needs_pps)
+       {
+               pps = palloc0_object(pgpa_planner_state);
+               pps->explain_state = es;
+               pps->trove = trove;
+#ifdef USE_ASSERT_CHECKING
+               pps->ri_check_hash =
+                       pgpa_ri_check_create(CurrentMemoryContext, 1024, NULL);
+#endif
+               SetPlannerGlobalExtensionState(glob, planner_extension_id, pps);
+       }
+}
+
+/*
+ * Carry out whatever work we want to do after planning is complete.
+ */
+static void
+pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
+                                         const char *query_string, PlannedStmt *pstmt)
+{
+       pgpa_planner_state *pps;
+       pgpa_trove *trove = NULL;
+       ExplainState *es = NULL;
+       pgpa_plan_walker_context walker = {0};  /* placate compiler */
+       bool            do_advice_feedback;
+       bool            do_collect_advice;
+       List       *pgpa_items = NIL;
+       pgpa_identifier *rt_identifiers = NULL;
+
+       /* Fetch our private state, set up by pgpa_planner_setup(). */
+       pps = GetPlannerGlobalExtensionState(glob, planner_extension_id);
+       if (pps != NULL)
+       {
+               trove = pps->trove;
+               es = pps->explain_state;
+       }
+
+       /* If at least one collector is enabled, generate advice. */
+       do_collect_advice = (pg_plan_advice_local_collection_limit > 0 ||
+                                                pg_plan_advice_shared_collection_limit > 0);
+
+       /* If we applied advice, generate feedback. */
+       do_advice_feedback = (trove != NULL && es != NULL);
+
+       /* If either of the above apply, analyze the resulting PlannedStmt. */
+       if (do_collect_advice || do_advice_feedback)
+       {
+               pgpa_plan_walker(&walker, pstmt);
+               rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
+       }
+
+       /*
+        * If advice collection is enabled, put the advice in string form and send
+        * it to the collector.
+        */
+       if (do_collect_advice)
+       {
+               char       *advice_string;
+               StringInfoData buf;
+
+               /* Generate a textual advice string. */
+               initStringInfo(&buf);
+               pgpa_output_advice(&buf, &walker, rt_identifiers);
+               advice_string = buf.data;
+
+               /* If the advice string is empty, don't bother collecting it. */
+               if (advice_string[0] != '\0')
+                       pgpa_collect_advice(pstmt->queryId, query_string, advice_string);
+
+               /*
+                * If we've gone to the trouble of generating an advice string, and if
+                * we're inside EXPLAIN, save the string so we don't need to
+                * regenerate it.
+                */
+               if (es != NULL)
+                       pgpa_items = lappend(pgpa_items,
+                                                                makeDefElem("advice_string",
+                                                                                        (Node *) makeString(advice_string),
+                                                                                        -1));
+       }
+
+       /*
+        * If we are planning within EXPLAIN, make arrangements to allow EXPLAIN
+        * to tell the user what has happened with the provided advice.
+        *
+        * NB: If EXPLAIN is used on a prepared is a prepared statement, planning
+        * will have already happened happened without recording these details. We
+        * could consider adding a GUC to cater to that scenario; or we could do
+        * this work all the time, but that seems like too much overhead.
+        */
+       if (do_advice_feedback)
+       {
+               List       *feedback = NIL;
+
+               /*
+                * Inject a Node-tree representation of all the trove-entry flags into
+                * the PlannedStmt.
+                */
+               feedback = pgpa_planner_append_feedback(feedback,
+                                                                                               trove,
+                                                                                               PGPA_TROVE_LOOKUP_SCAN,
+                                                                                               rt_identifiers, &walker);
+               feedback = pgpa_planner_append_feedback(feedback,
+                                                                                               trove,
+                                                                                               PGPA_TROVE_LOOKUP_JOIN,
+                                                                                               rt_identifiers, &walker);
+               feedback = pgpa_planner_append_feedback(feedback,
+                                                                                               trove,
+                                                                                               PGPA_TROVE_LOOKUP_REL,
+                                                                                               rt_identifiers, &walker);
+
+               pgpa_items = lappend(pgpa_items, makeDefElem("feedback",
+                                                                                                        (Node *) feedback,
+                                                                                                        -1));
+       }
+
+       /* Push whatever data we're saving into the PlannedStmt. */
+       if (pgpa_items != NIL)
+               pstmt->extension_state =
+                       lappend(pstmt->extension_state,
+                                       makeDefElem("pg_plan_advice", (Node *) pgpa_items, -1));
+
+       /*
+        * If assertions are enabled, cross-check the generated range table
+        * identifiers.
+        */
+       if (pps != NULL)
+               pgpa_ri_checker_validate(pps, pstmt);
+}
+
+/*
+ * Enforce overall restrictions on a join relation that apply uniformly
+ * regardless of the choice of inner and outer rel.
+ */
+static void
+pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p, char *plan_name,
+                                                                 pgpa_join_state *pjs)
+{
+       int                     i = -1;
+       int                     flags;
+       bool            gather_conflict = false;
+       uint64          gather_mask = 0;
+       Bitmapset  *gather_partial_match = NULL;
+       Bitmapset  *gather_full_match = NULL;
+       bool            partitionwise_conflict = false;
+       int                     partitionwise_outcome = 0;
+       Bitmapset  *partitionwise_partial_match = NULL;
+       Bitmapset  *partitionwise_full_match = NULL;
+
+       /* Iterate over all possibly-relevant advice. */
+       while ((i = bms_next_member(pjs->rel_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *entry = &pjs->rel_entries[i];
+               pgpa_itm_type itm;
+               bool            full_match = false;
+               uint64          my_gather_mask = 0;
+               int                     my_partitionwise_outcome = 0;   /* >0 yes, <0 no */
+
+               /*
+                * For GATHER and GATHER_MERGE, if the specified relations exactly
+                * match this joinrel, do whatever the advice says; otherwise, don't
+                * allow Gather or Gather Merge at this level. For NO_GATHER, there
+                * must be a single target relation which must be included in this
+                * joinrel, so just don't allow Gather or Gather Merge here, full
+                * stop.
+                */
+               if (entry->tag == PGPA_TAG_NO_GATHER)
+               {
+                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                       full_match = true;
+               }
+               else
+               {
+                       int                     total_count;
+
+                       total_count = pjs->outer_count + pjs->inner_count;
+                       itm = pgpa_identifiers_match_target(total_count, pjs->rids,
+                                                                                               entry->target);
+                       Assert(itm != PGPA_ITM_DISJOINT);
+
+                       if (itm == PGPA_ITM_EQUAL)
+                       {
+                               full_match = true;
+                               if (entry->tag == PGPA_TAG_PARTITIONWISE)
+                                       my_partitionwise_outcome = 1;
+                               else if (entry->tag == PGPA_TAG_GATHER)
+                                       my_gather_mask = PGS_GATHER;
+                               else if (entry->tag == PGPA_TAG_GATHER_MERGE)
+                                       my_gather_mask = PGS_GATHER_MERGE;
+                               else
+                                       elog(ERROR, "unexpected advice tag: %d",
+                                                (int) entry->tag);
+                       }
+                       else
+                       {
+                               if (entry->tag == PGPA_TAG_PARTITIONWISE)
+                               {
+                                       my_partitionwise_outcome = -1;
+                                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                               }
+                               else if (entry->tag == PGPA_TAG_GATHER ||
+                                                entry->tag == PGPA_TAG_GATHER_MERGE)
+                               {
+                                       my_partitionwise_outcome = -1;
+                                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                               }
+                               else
+                                       elog(ERROR, "unexpected advice tag: %d",
+                                                (int) entry->tag);
+                       }
+               }
+
+               /*
+                * If we set my_gather_mask up above, then we (1) make a note if the
+                * advice conflicted, (2) remember the mask value, and (3) remember
+                * whether this was a full or partial match.
+                */
+               if (my_gather_mask != 0)
+               {
+                       if (gather_mask != 0 && gather_mask != my_gather_mask)
+                               gather_conflict = true;
+                       gather_mask = my_gather_mask;
+                       if (full_match)
+                               gather_full_match = bms_add_member(gather_full_match, i);
+                       else
+                               gather_partial_match = bms_add_member(gather_partial_match, i);
+               }
+
+               /*
+                * Likewise, if we set my_partitionwise_outcome up above, then we (1)
+                * make a note if the advice conflicted, (2) remember what the desired
+                * outcome was, and (3) remember whether this was a full or partial
+                * match.
+                */
+               if (my_partitionwise_outcome != 0)
+               {
+                       if (partitionwise_outcome != 0 &&
+                               partitionwise_outcome != my_partitionwise_outcome)
+                               partitionwise_conflict = true;
+                       partitionwise_outcome = my_partitionwise_outcome;
+                       if (full_match)
+                               partitionwise_full_match =
+                                       bms_add_member(partitionwise_full_match, i);
+                       else
+                               partitionwise_partial_match =
+                                       bms_add_member(partitionwise_partial_match, i);
+               }
+       }
+
+       /*
+        * Mark every Gather-related piece of advice as partially matched, and if
+        * the set of targets exactly matched this relation, fully matched. If
+        * there was a conflict, mark them all as conflicting.
+        */
+       flags = PGPA_TE_MATCH_PARTIAL;
+       if (gather_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(pjs->rel_entries, gather_partial_match, flags);
+       flags |= PGPA_TE_MATCH_FULL;
+       pgpa_trove_set_flags(pjs->rel_entries, gather_full_match, flags);
+
+       /* Likewise for partitionwise advice. */
+       flags = PGPA_TE_MATCH_PARTIAL;
+       if (partitionwise_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(pjs->rel_entries, partitionwise_partial_match, flags);
+       flags |= PGPA_TE_MATCH_FULL;
+       pgpa_trove_set_flags(pjs->rel_entries, partitionwise_full_match, flags);
+
+       /* If there is a non-conflicting gather specification, enforce it. */
+       if (gather_mask != 0 && !gather_conflict)
+       {
+               *pgs_mask_p &=
+                       ~(PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL);
+               *pgs_mask_p |= gather_mask;
+       }
+
+       /*
+        * If there is a non-conflicting partitionwise specification, enforce.
+        *
+        * To force a partitionwise join, we disable all the ordinary means of
+        * performing a join, and instead only Append and MergeAppend paths here.
+        * To prevent one, we just disable Append and MergeAppend.  Note that we
+        * must not unset PGS_CONSIDER_PARTITIONWISE even when we don't want a
+        * partitionwise join here, because we might want one at a higher level
+        * that is constructing using paths from this level.
+        */
+       if (partitionwise_outcome != 0 && !partitionwise_conflict)
+       {
+               if (partitionwise_outcome > 0)
+                       *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY) |
+                               PGS_APPEND | PGS_MERGE_APPEND | PGS_CONSIDER_PARTITIONWISE;
+               else
+                       *pgs_mask_p &= ~(PGS_APPEND | PGS_MERGE_APPEND);
+       }
+}
+
+/*
+ * Enforce restrictions on the join order or join method.
+ *
+ * Note that, although it is possible to view PARTITIONWISE advice as
+ * controlling the join method, we can't enforce it here, because the code
+ * path where this executes only deals with join paths that are built directly
+ * from a single outer path and a single inner path.
+ */
+static void
+pgpa_planner_apply_join_path_advice(JoinType jointype, uint64 *pgs_mask_p,
+                                                                       char *plan_name,
+                                                                       pgpa_join_state *pjs)
+{
+       int                     i = -1;
+       Bitmapset  *jo_permit_indexes = NULL;
+       Bitmapset  *jo_deny_indexes = NULL;
+       Bitmapset  *jm_indexes = NULL;
+       bool            jm_conflict = false;
+       uint32          join_mask = 0;
+
+       /* Iterate over all possibly-relevant advice. */
+       while ((i = bms_next_member(pjs->join_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *entry = &pjs->join_entries[i];
+               uint32          my_join_mask;
+
+               /* Handle join order advice. */
+               if (entry->tag == PGPA_TAG_JOIN_ORDER)
+               {
+                       if (pgpa_join_order_permits_join(pjs->outer_count,
+                                                                                        pjs->inner_count,
+                                                                                        pjs->rids,
+                                                                                        entry))
+                               jo_permit_indexes = bms_add_member(jo_permit_indexes, i);
+                       else
+                               jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       continue;
+               }
+
+               /* Handle join strategy advice. */
+               my_join_mask = pgpa_join_strategy_mask_from_advice_tag(entry->tag);
+               if (my_join_mask != 0)
+               {
+                       bool            permit;
+                       bool            restrict_method;
+
+                       if (entry->tag == PGPA_TAG_FOREIGN_JOIN)
+                               permit = pgpa_opaque_join_permits_join(pjs->outer_count,
+                                                                                                          pjs->inner_count,
+                                                                                                          pjs->rids,
+                                                                                                          entry,
+                                                                                                          &restrict_method);
+                       else
+                               permit = pgpa_join_method_permits_join(pjs->outer_count,
+                                                                                                          pjs->inner_count,
+                                                                                                          pjs->rids,
+                                                                                                          entry,
+                                                                                                          &restrict_method);
+                       if (!permit)
+                               jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       else if (restrict_method)
+                       {
+                               jo_permit_indexes = bms_add_member(jo_permit_indexes, i);
+                               jm_indexes = bms_add_member(jo_permit_indexes, i);
+                               if (join_mask != 0 && join_mask != my_join_mask)
+                                       jm_conflict = true;
+                               join_mask = my_join_mask;
+                       }
+                       continue;
+               }
+
+               /* Handle semijoin uniqueness advice. */
+               if (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE ||
+                       entry->tag == PGPA_TAG_SEMIJOIN_NON_UNIQUE)
+               {
+                       bool            advice_unique;
+                       bool            jt_unique;
+                       bool            jt_non_unique;
+                       bool            restrict_method;
+
+                       /* Advice wants to unique-ify and use a regular join? */
+                       advice_unique = (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE);
+
+                       /* Planner is trying to unique-ify and use a regular join? */
+                       jt_unique = (jointype == JOIN_UNIQUE_INNER ||
+                                                jointype == JOIN_UNIQUE_OUTER);
+
+                       /* Planner is trying a semi-join, without unique-ifying? */
+                       jt_non_unique = (jointype == JOIN_SEMI ||
+                                                        jointype == JOIN_RIGHT_SEMI);
+
+                       /*
+                        * These advice tags behave very much like join method advice, in
+                        * that they want the inner side of the semijoin to match the
+                        * relations listed in the advice. Hence, we test whether join
+                        * method advice would enforce a join order restriction here, and
+                        * disallow the join if not.
+                        *
+                        * XXX. Think harder about right semijoins.
+                        */
+                       if (!pgpa_join_method_permits_join(pjs->outer_count,
+                                                                                          pjs->inner_count,
+                                                                                          pjs->rids,
+                                                                                          entry,
+                                                                                          &restrict_method))
+                               jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       else if (restrict_method)
+                       {
+                               jo_permit_indexes = bms_add_member(jo_permit_indexes, i);
+                               if (!jt_unique && !jt_non_unique)
+                               {
+                                       /*
+                                        * This doesn't seem to be a semijoin to which SJ_UNIQUE
+                                        * or SJ_NON_UNIQUE can be applied.
+                                        */
+                                       entry->flags |= PGPA_TE_INAPPLICABLE;
+                               }
+                               else if (advice_unique != jt_unique)
+                                       jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       }
+                       continue;
+               }
+       }
+
+       /*
+        * If the advice indicates both that this join order is permissible and
+        * also that it isn't, then mark advice related to the join order as
+        * conflicting.
+        */
+       if (jo_permit_indexes != NULL && jo_deny_indexes != NULL)
+       {
+               pgpa_trove_set_flags(pjs->join_entries, jo_permit_indexes,
+                                                        PGPA_TE_CONFLICTING);
+               pgpa_trove_set_flags(pjs->join_entries, jo_deny_indexes,
+                                                        PGPA_TE_CONFLICTING);
+       }
+
+       /*
+        * If more than one join method specification is relevant here and they
+        * differ, mark them all as conflicting.
+        */
+       if (jm_conflict)
+               pgpa_trove_set_flags(pjs->join_entries, jm_indexes,
+                                                        PGPA_TE_CONFLICTING);
+
+       /*
+        * If we were advised to deny this join order, then do so. However, if we
+        * were also advised to permit it, then do nothing, since the advice
+        * conflicts.
+        */
+       if (jo_deny_indexes != NULL && jo_permit_indexes == NULL)
+               *pgs_mask_p = 0;
+
+       /*
+        * If we were advised to restrict the join method, then do so. However, if
+        * we got conflicting join method advice or were also advised to reject
+        * this join order completely, then instead do nothing.
+        */
+       if (join_mask != 0 && !jm_conflict && jo_deny_indexes == NULL)
+               *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY) | join_mask;
+}
+
+/*
+ * Translate an advice tag into a path generation strategy mask.
+ *
+ * This function can be called with tag types that don't represent join
+ * strategies. In such cases, we just return 0, which can't be confused with
+ * a valid mask.
+ */
+static uint64
+pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag)
+{
+       switch (tag)
+       {
+               case PGPA_TAG_FOREIGN_JOIN:
+                       return PGS_FOREIGNJOIN;
+               case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       return PGS_MERGEJOIN_PLAIN;
+               case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       return PGS_MERGEJOIN_MATERIALIZE;
+               case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       return PGS_NESTLOOP_PLAIN;
+               case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       return PGS_NESTLOOP_MATERIALIZE;
+               case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       return PGS_NESTLOOP_MEMOIZE;
+               case PGPA_TAG_HASH_JOIN:
+                       return PGS_HASHJOIN;
+               default:
+                       return 0;
+       }
+}
+
+/*
+ * Does a certain item of join order advice permit a certain join?
+ */
+static bool
+pgpa_join_order_permits_join(int outer_count, int inner_count,
+                                                        pgpa_identifier *rids,
+                                                        pgpa_trove_entry *entry)
+{
+       bool            loop = true;
+       bool            sublist = false;
+       int                     length;
+       int                     outer_length;
+       pgpa_advice_target *target = entry->target;
+       pgpa_advice_target *prefix_target;
+
+       /* We definitely have at least a partial match for this trove entry. */
+       entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+       /*
+        * Find the innermost sublist that contains all keys; if no sublist does,
+        * then continue processing with the toplevel list.
+        *
+        * For example, if the advice says JOIN_ORDER(t1 t2 (t3 t4 t5)), then we
+        * should evaluate joins that only involve t3, t4, and/or t5 against the
+        * (t3 t4 t5) sublist, and others against the full list.
+        *
+        * Note that (1) outermost sublist is always ordered and (2) whenever we
+        * zoom into an unordered sublist, we instantly accept the proposed join.
+        * If the advice says JOIN_ORDER(t1 t2 {t3 t4 t5}), any approach to
+        * joining t3, t4, and/or t5 is acceptable.
+        */
+       while (loop)
+       {
+               Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+               loop = false;
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       pgpa_itm_type itm;
+
+                       if (child_target->ttype == PGPA_TARGET_IDENTIFIER)
+                               continue;
+
+                       itm = pgpa_identifiers_match_target(outer_count + inner_count,
+                                                                                               rids, child_target);
+                       if (itm == PGPA_ITM_EQUAL || itm == PGPA_ITM_KEYS_ARE_SUBSET)
+                       {
+                               if (child_target->ttype == PGPA_TARGET_ORDERED_LIST)
+                               {
+                                       target = child_target;
+                                       sublist = true;
+                                       loop = true;
+                                       break;
+                               }
+                               else
+                               {
+                                       Assert(child_target->ttype == PGPA_TARGET_UNORDERED_LIST);
+                                       return true;
+                               }
+                       }
+               }
+       }
+
+       /*
+        * Try to find a prefix of the selected join order list that is exactly
+        * equal to the outer side of the proposed join.
+        */
+       length = list_length(target->children);
+       prefix_target = palloc0_object(pgpa_advice_target);
+       prefix_target->ttype = PGPA_TARGET_ORDERED_LIST;
+       for (outer_length = 1; outer_length <= length; ++outer_length)
+       {
+               pgpa_itm_type itm;
+
+               /* Avoid leaking memory in every loop iteration. */
+               if (prefix_target->children != NULL)
+                       list_free(prefix_target->children);
+               prefix_target->children = list_copy_head(target->children,
+                                                                                                outer_length);
+
+               /* Search, hoping to find an exact match. */
+               itm = pgpa_identifiers_match_target(outer_count, rids, prefix_target);
+               if (itm == PGPA_ITM_EQUAL)
+                       break;
+
+               /*
+                * If the prefix of the join order list that we're considering
+                * includes some but not all of the outer rels, we can make the prefix
+                * longer to find an exact match. But the advice hasn't mentioned
+                * everything that's part of our outer rel yet, but has mentioned
+                * things that are not, then this join doesn't match the join order
+                * list.
+                */
+               if (itm != PGPA_ITM_TARGETS_ARE_SUBSET)
+                       return false;
+       }
+
+       /*
+        * If the previous looped stopped before the prefix_target included the
+        * entire join order list, then the next member of the join order list
+        * must exactly match the inner side of the join.
+        *
+        * Example: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), if the outer side of the
+        * current join includes only t1, then the inner side must be exactly t2;
+        * if the outer side includes both t1 and t2, then the inner side must
+        * include exactly t3, t4, and t5.
+        */
+       if (outer_length < length)
+       {
+               pgpa_advice_target *inner_target;
+               pgpa_itm_type itm;
+
+               inner_target = list_nth(target->children, outer_length);
+
+               itm = pgpa_identifiers_match_target(inner_count, rids + outer_count,
+                                                                                       inner_target);
+
+               /*
+                * Before returning, consider whether we need to mark this entry as
+                * fully matched. If we found every item but one on the lefthand side
+                * of the join and the last item on the righthand side of the join,
+                * then the answer is yes.
+                */
+               if (outer_length + 1 == length && itm == PGPA_ITM_EQUAL)
+                       entry->flags |= PGPA_TE_MATCH_FULL;
+
+               return (itm == PGPA_ITM_EQUAL);
+       }
+
+       /*
+        * If we get here, then the outer side of the join includes the entirety
+        * of the join order list. In this case, we behave differently depending
+        * on whether we're looking at the top-level join order list or sublist.
+        * At the top-level, we treat the specified list as mandating that the
+        * actual join order has the given list as a prefix, but a sublist
+        * requires an exact match.
+        *
+        * Exmaple: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), we must start by joining
+        * all five of those relations and in that sequence, but once that is
+        * done, it's OK to join any other rels that are part of the join problem.
+        * This allows a user to specify the driving table and perhaps the first
+        * few things to which it should be joined while leaving the rest of the
+        * join order up the optimizer. But it seems like it would be surprising,
+        * given that specification, if the user could add t6 to the (t3 t4 t5)
+        * sub-join, so we don't allow that. If we did want to allow it, the logic
+        * earlier in this function would require substantial adjustment: we could
+        * allow the t3-t4-t5-t6 join to be built here, but the next step of
+        * joining t1-t2 to the result would still be rejected.
+        */
+       return !sublist;
+}
+
+/*
+ * Does a certain item of join method advice permit a certain join?
+ *
+ * Advice such as HASH_JOIN((x y)) means that there should be a hash join with
+ * exactly x and y on the inner side. Obviously, this means that if we are
+ * considering a join with exactly x and y on the inner side, we should enforce
+ * the use of a hash join. However, it also means that we must reject some
+ * incompatible join orders entirely.  For example, a join with exactly x
+ * and y on the outer side shouldn't be allowed, because such paths might win
+ * over the advice-driven path on cost.
+ *
+ * To accommodate these requirements, this function returns true if the join
+ * should be allowed and false if it should not. Furthermore, *restrict_method
+ * is set to true if the join method should be enforced and false if not.
+ */
+static bool
+pgpa_join_method_permits_join(int outer_count, int inner_count,
+                                                         pgpa_identifier *rids,
+                                                         pgpa_trove_entry *entry,
+                                                         bool *restrict_method)
+{
+       pgpa_advice_target *target = entry->target;
+       pgpa_itm_type inner_itm;
+       pgpa_itm_type outer_itm;
+       pgpa_itm_type join_itm;
+
+       /* We definitely have at least a partial match for this trove entry. */
+       entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+       *restrict_method = false;
+
+       /*
+        * If our inner rel mentions exactly the same relations as the advice
+        * target, allow the join and enforce the join method restriction.
+        *
+        * If our inner rel mentions a superset of the target relations, allow the
+        * join. The join we care about has already taken place, and this advice
+        * imposes no further restrictions.
+        */
+       inner_itm = pgpa_identifiers_match_target(inner_count,
+                                                                                         rids + outer_count,
+                                                                                         target);
+       if (inner_itm == PGPA_ITM_EQUAL)
+       {
+               entry->flags |= PGPA_TE_MATCH_FULL;
+               *restrict_method = true;
+               return true;
+       }
+       else if (inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+               return true;
+
+       /*
+        * If our outer rel mentions a supserset of the relations in the advice
+        * target, no restrictions apply. The join we care has already taken
+        * place, and this advice imposes no further restrictions.
+        *
+        * On the other hand, if our outer rel mentions exactly the relations
+        * mentioned in the advice target, the planner is trying to reverse the
+        * sides of the join as compared with our desired outcome. Reject that.
+        */
+       outer_itm = pgpa_identifiers_match_target(outer_count,
+                                                                                         rids, target);
+       if (outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+               return true;
+       else if (outer_itm == PGPA_ITM_EQUAL)
+               return false;
+
+       /*
+        * If the advice target mentions only a single relation, the test below
+        * cannot ever pass, so save some work by exiting now.
+        */
+       if (target->ttype == PGPA_TARGET_IDENTIFIER)
+               return false;
+
+       /*
+        * If everything in the joinrel is appears in the advice target, we're
+        * below the level of the join we want to control.
+        *
+        * For example, HASH_JOIN((x y)) doesn't restrict how x and y can be
+        * joined.
+        *
+        * This lookup shouldn't return PGPA_ITM_DISJOINT, because any such advice
+        * should not have been returned from the trove in the first place.
+        */
+       join_itm = pgpa_identifiers_match_target(outer_count + inner_count,
+                                                                                        rids, target);
+       Assert(join_itm != PGPA_ITM_DISJOINT);
+       if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET ||
+               join_itm == PGPA_ITM_EQUAL)
+               return true;
+
+       /*
+        * We've already permitted all allowable cases, so reject this.
+        *
+        * If we reach this point, then the advice overlaps with this join but
+        * isn't entirely contained within either side, and there's also at least
+        * one relation present in the join that isn't mentioned by the advice.
+        *
+        * For instance, in the HASH_JOIN((x y)) example, we would reach here if x
+        * were on one side of the join, y on the other, and at least one of the
+        * two sides also included some other relation, say t. In that case,
+        * accepting this join would allow the (x y t) joinrel to contain
+        * non-disabled paths that do not put (x y) on the inner side of a hash
+        * join; we could instead end up with something like (x JOIN t) JOIN y.
+        */
+       return false;
+}
+
+/*
+ * Does advice concerning an opaque join permit a certain join?
+ *
+ * By an opaque join, we mean one where the exact mechanism by which the
+ * join is performed is not visible to PostgreSQL. Currently this is the
+ * case only for foreign joins: FOREIGN_JOIN((x y z)) means that x, y, and
+ * z are joined on the remote side, but we know nothing about the join order
+ * or join methods used over there.
+ */
+static bool
+pgpa_opaque_join_permits_join(int outer_count, int inner_count,
+                                                         pgpa_identifier *rids,
+                                                         pgpa_trove_entry *entry,
+                                                         bool *restrict_method)
+{
+       pgpa_advice_target *target = entry->target;
+       pgpa_itm_type join_itm;
+
+       /* We definitely have at least a partial match for this trove entry. */
+       entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+       *restrict_method = false;
+
+       join_itm = pgpa_identifiers_match_target(outer_count + inner_count,
+                                                                                        rids, target);
+       if (join_itm == PGPA_ITM_EQUAL)
+       {
+               /*
+                * We have an exact match, and should therefore allow the join and
+                * enforce the use of the relevant opaque join method.
+                */
+               entry->flags |= PGPA_TE_MATCH_FULL;
+               *restrict_method = true;
+               return true;
+       }
+
+       if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET ||
+               join_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+       {
+               /*
+                * If join_itm == PGPA_ITM_TARGETS_ARE_SUBSET, then the join we care
+                * about has already taken place and no further restrictions apply.
+                *
+                * If join_itm == PGPA_ITM_KEYS_ARE_SUBSET, we're still building up to
+                * the join we care about and have not introduced any extraneous
+                * relations not named in the advice. Note that ForeignScan paths for
+                * joins are built up from ForeignScan paths from underlying joins and
+                * scans, so we must not disable this join when considering a subset
+                * of the relations we ultimately want.
+                */
+               return true;
+       }
+
+       /*
+        * The advice overlaps the join, but at least one relation is present in
+        * the join that isn't mentioned by the advice. We want to disable such
+        * paths so that we actually push down the join as intended.
+        */
+       return false;
+}
+
+/*
+ * Apply scan advice to a RelOptInfo.
+ *
+ * XXX. For bitmap heap scans, we're just ignoring the index information from
+ * the advice. That's not cool.
+ */
+static void
+pgpa_planner_apply_scan_advice(RelOptInfo *rel,
+                                                          pgpa_trove_entry *scan_entries,
+                                                          Bitmapset *scan_indexes,
+                                                          pgpa_trove_entry *rel_entries,
+                                                          Bitmapset *rel_indexes)
+{
+       bool            gather_conflict = false;
+       Bitmapset  *gather_partial_match = NULL;
+       Bitmapset  *gather_full_match = NULL;
+       int                     i = -1;
+       pgpa_trove_entry *scan_entry = NULL;
+       int                     flags;
+       bool            scan_type_conflict = false;
+       Bitmapset  *scan_type_indexes = NULL;
+       Bitmapset  *scan_type_rel_indexes = NULL;
+       uint64          gather_mask = 0;
+       uint64          scan_type = 0;
+
+       /* Scrutinize available scan advice. */
+       while ((i = bms_next_member(scan_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *my_entry = &scan_entries[i];
+               uint64          my_scan_type = 0;
+
+               /* Translate our advice tags to a scan strategy advice value. */
+               if (my_entry->tag == PGPA_TAG_BITMAP_HEAP_SCAN)
+                       my_scan_type = PGS_BITMAPSCAN;
+               else if (my_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN)
+                       my_scan_type = PGS_INDEXONLYSCAN | PGS_CONSIDER_INDEXONLY;
+               else if (my_entry->tag == PGPA_TAG_INDEX_SCAN)
+                       my_scan_type = PGS_INDEXSCAN;
+               else if (my_entry->tag == PGPA_TAG_SEQ_SCAN)
+                       my_scan_type = PGS_SEQSCAN;
+               else if (my_entry->tag == PGPA_TAG_TID_SCAN)
+                       my_scan_type = PGS_TIDSCAN;
+
+               /*
+                * If this is understandable scan advice, hang on to the entry, the
+                * inferred scan type type, and the index at which we found it.
+                *
+                * Also make a note if we see conflicting scan type advice. Note that
+                * we regard two index specifications as conflicting unless they match
+                * exactly. In theory, perhaps we could regard INDEX_SCAN(a c) and
+                * INDEX_SCAN(a b.c) as non-conflicting if it happens that the only
+                * index named c is in schema b, but it doesn't seem worth the code.
+                */
+               if (my_scan_type != 0)
+               {
+                       if (scan_type != 0 && scan_type != my_scan_type)
+                               scan_type_conflict = true;
+                       if (!scan_type_conflict && scan_entry != NULL &&
+                               my_entry->target->itarget != NULL &&
+                               scan_entry->target->itarget != NULL &&
+                               !pgpa_index_targets_equal(scan_entry->target->itarget,
+                                                                                 my_entry->target->itarget))
+                               scan_type_conflict = true;
+                       scan_entry = my_entry;
+                       scan_type = my_scan_type;
+                       scan_type_indexes = bms_add_member(scan_type_indexes, i);
+               }
+       }
+
+       /* Scrutinize available gather-related and partitionwise advice. */
+       i = -1;
+       while ((i = bms_next_member(rel_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *my_entry = &rel_entries[i];
+               uint64          my_gather_mask = 0;
+               bool            just_one_rel;
+
+               just_one_rel = my_entry->target->ttype == PGPA_TARGET_IDENTIFIER
+                       || list_length(my_entry->target->children) == 1;
+
+               /*
+                * PARTITIONWISE behaves like a scan type, except that if there's more
+                * than one relation targeted, it has no effect at this level.
+                */
+               if (my_entry->tag == PGPA_TAG_PARTITIONWISE)
+               {
+                       if (just_one_rel)
+                       {
+                               const uint64 my_scan_type = PGS_APPEND | PGS_MERGE_APPEND;
+
+                               if (scan_type != 0 && scan_type != my_scan_type)
+                                       scan_type_conflict = true;
+                               scan_entry = my_entry;
+                               scan_type = my_scan_type;
+                               scan_type_rel_indexes =
+                                       bms_add_member(scan_type_rel_indexes, i);
+                       }
+                       continue;
+               }
+
+               /*
+                * GATHER and GATHER_MERGE applied to a single rel mean that we should
+                * use the correspondings strategy here, while applying either to more
+                * than one rel means we should not use those strategies here, but
+                * rather at the level of the joinrel that corresponds to what was
+                * specified. NO_GATHER can only be applied to single rels.
+                *
+                * Note that setting PGS_CONSIDER_NONPARTIAL in my_gather_mask is
+                * equivalent to allowing the non-use of either form of Gather here.
+                */
+               if (my_entry->tag == PGPA_TAG_GATHER ||
+                       my_entry->tag == PGPA_TAG_GATHER_MERGE)
+               {
+                       if (!just_one_rel)
+                               my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                       else if (my_entry->tag == PGPA_TAG_GATHER)
+                               my_gather_mask = PGS_GATHER;
+                       else
+                               my_gather_mask = PGS_GATHER_MERGE;
+               }
+               else if (my_entry->tag == PGPA_TAG_NO_GATHER)
+               {
+                       Assert(just_one_rel);
+                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+               }
+
+               /*
+                * If we set my_gather_mask up above, then we (1) make a note if the
+                * advice conflicted, (2) remember the mask value, and (3) remember
+                * whether this was a full or partial match.
+                */
+               if (my_gather_mask != 0)
+               {
+                       if (gather_mask != 0 && gather_mask != my_gather_mask)
+                               gather_conflict = true;
+                       gather_mask = my_gather_mask;
+                       if (just_one_rel)
+                               gather_full_match = bms_add_member(gather_full_match, i);
+                       else
+                               gather_partial_match = bms_add_member(gather_partial_match, i);
+               }
+       }
+
+       /* Enforce choice of index. */
+       if (scan_entry != NULL && !scan_type_conflict &&
+               (scan_entry->tag == PGPA_TAG_INDEX_SCAN ||
+                scan_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN))
+       {
+               pgpa_index_target *itarget = scan_entry->target->itarget;
+               IndexOptInfo *matched_index = NULL;
+
+               Assert(itarget->itype == PGPA_INDEX_NAME);
+
+               foreach_node(IndexOptInfo, index, rel->indexlist)
+               {
+                       char       *relname = get_rel_name(index->indexoid);
+                       Oid                     nspoid = get_rel_namespace(index->indexoid);
+                       char       *relnamespace = get_namespace_name(nspoid);
+
+                       if (strcmp(itarget->indname, relname) == 0 &&
+                               (itarget->indnamespace == NULL ||
+                                strcmp(itarget->indnamespace, relnamespace) == 0))
+                       {
+                               matched_index = index;
+                               break;
+                       }
+               }
+
+               if (matched_index == NULL)
+               {
+                       /* Don't force the scan type if the index doesn't exist. */
+                       scan_type = 0;
+
+                       /* Mark advice as inapplicable. */
+                       pgpa_trove_set_flags(scan_entries, scan_type_indexes,
+                                                                PGPA_TE_INAPPLICABLE);
+               }
+               else
+               {
+                       /* Retain this index and discard the rest. */
+                       rel->indexlist = list_make1(matched_index);
+               }
+       }
+
+       /*
+        * Mark all the scan method entries as fully matched; and if they specify
+        * different things, mark them all as conflicting.
+        */
+       flags = PGPA_TE_MATCH_PARTIAL | PGPA_TE_MATCH_FULL;
+       if (scan_type_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(scan_entries, scan_type_indexes, flags);
+       pgpa_trove_set_flags(rel_entries, scan_type_rel_indexes, flags);
+
+       /*
+        * Mark every Gather-related piece of advice as partially matched. Mark
+        * the ones that included this relation as a target by itself as fully
+        * matched. If there was a conflict, mark them all as conflicting.
+        */
+       flags = PGPA_TE_MATCH_PARTIAL;
+       if (gather_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(rel_entries, gather_partial_match, flags);
+       flags |= PGPA_TE_MATCH_FULL;
+       pgpa_trove_set_flags(rel_entries, gather_full_match, flags);
+
+       /* If there is a non-conflicting scan specification, enforce it. */
+       if (scan_type != 0 && !scan_type_conflict)
+       {
+               rel->pgs_mask &=
+                       ~(PGS_SCAN_ANY | PGS_APPEND | PGS_MERGE_APPEND |
+                         PGS_CONSIDER_INDEXONLY);
+               rel->pgs_mask |= scan_type;
+       }
+
+       /* If there is a non-conflicting gather specification, enforce it. */
+       if (gather_mask != 0 && !gather_conflict)
+       {
+               rel->pgs_mask &=
+                       ~(PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL);
+               rel->pgs_mask |= gather_mask;
+       }
+}
+
+/*
+ * Add feedback entries to for one trove slice to the provided list and
+ * return the resulting list.
+ *
+ * Feedback entries are generated from the trove entry's flags. It's assumed
+ * that the caller has already set all relevant flags with the exception of
+ * PGPA_TE_FAILED. We set that flag here if appropriate.
+ */
+static List *
+pgpa_planner_append_feedback(List *list, pgpa_trove *trove,
+                                                        pgpa_trove_lookup_type type,
+                                                        pgpa_identifier *rt_identifiers,
+                                                        pgpa_plan_walker_context *walker)
+{
+       pgpa_trove_entry *entries;
+       int                     nentries;
+       StringInfoData buf;
+
+       initStringInfo(&buf);
+       pgpa_trove_lookup_all(trove, type, &entries, &nentries);
+       for (int i = 0; i < nentries; ++i)
+       {
+               pgpa_trove_entry *entry = &entries[i];
+               DefElem    *item;
+
+               /*
+                * If this entry was fully matched, check whether generating advice
+                * from this plan would produce such an entry. If not, label the entry
+                * as failed.
+                */
+               if ((entry->flags & PGPA_TE_MATCH_FULL) != 0 &&
+                       !pgpa_walker_would_advise(walker, rt_identifiers,
+                                                                         entry->tag, entry->target))
+                       entry->flags |= PGPA_TE_FAILED;
+
+               item = makeDefElem(pgpa_cstring_trove_entry(entry),
+                                                  (Node *) makeInteger(entry->flags), -1);
+               list = lappend(list, item);
+       }
+
+       return list;
+}
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * Fast hash function for a key consisting of an RTI and plan name.
+ */
+static uint32
+pgpa_ri_checker_hash_key(pgpa_ri_checker_key key)
+{
+       fasthash_state hs;
+       int                     sp_len;
+
+       fasthash_init(&hs, 0);
+
+       hs.accum = key.rti;
+       fasthash_combine(&hs);
+
+       /* plan_name can be NULL */
+       if (key.plan_name == NULL)
+               sp_len = 0;
+       else
+               sp_len = fasthash_accum_cstring(&hs, key.plan_name);
+
+       /* hashfn_unstable.h recommends using string length as tweak */
+       return fasthash_final32(&hs, sp_len);
+}
+
+#endif
+
+/*
+ * Save the range table identifier for one relation for future cross-checking.
+ */
+static void
+pgpa_ri_checker_save(pgpa_planner_state *pps, PlannerInfo *root,
+                                        RelOptInfo *rel)
+{
+#ifdef USE_ASSERT_CHECKING
+       pgpa_ri_checker_key key;
+       pgpa_ri_checker *check;
+       pgpa_identifier rid;
+       const char *rid_string;
+       bool            found;
+
+       key.rti = bms_singleton_member(rel->relids);
+       key.plan_name = root->plan_name;
+       pgpa_compute_identifier_by_rti(root, key.rti, &rid);
+       rid_string = pgpa_identifier_string(&rid);
+       check = pgpa_ri_check_insert(pps->ri_check_hash, key, &found);
+       Assert(!found || strcmp(check->rid_string, rid_string) == 0);
+       check->rid_string = rid_string;
+#endif
+}
+
+/*
+ * Validate that the range table identifiers we were able to generate during
+ * planning match the ones we generated from the final plan.
+ */
+static void
+pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt)
+{
+#ifdef USE_ASSERT_CHECKING
+       pgpa_identifier *rt_identifiers;
+       pgpa_ri_check_iterator it;
+       pgpa_ri_checker *check;
+
+       /* Create identifiers from the planned statement. */
+       rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
+
+       /* Iterate over identifiers created during planning, so we can compare. */
+       pgpa_ri_check_start_iterate(pps->ri_check_hash, &it);
+       while ((check = pgpa_ri_check_iterate(pps->ri_check_hash, &it)) != NULL)
+       {
+               int                     rtoffset = 0;
+               const char *rid_string;
+               Index           flat_rti;
+
+               /*
+                * If there's no plan name associated with this entry, then the
+                * rtoffset is 0. Otherwise, we can search the SubPlanRTInfo list to
+                * find the rtoffset.
+                */
+               if (check->key.plan_name != NULL)
+               {
+                       foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos)
+                       {
+                               /*
+                                * If rtinfo->dummy is set, then the subquery's range table
+                                * will only have been partially copied to the final range
+                                * table. Specifically, only RTE_RELATION entries and
+                                * RTE_SUBQUERY entries that were once RTE_RELATION entries
+                                * will be copied, as per add_rtes_to_flat_rtable. Therefore,
+                                * there's no fixed rtoffset that we can apply to the RTIs
+                                * used during planning to locate the corresponding relations
+                                * in the final rtable.
+                                *
+                                * With more complex logic, we could work around that problem
+                                * by remembering the whole contents of the subquery's rtable
+                                * during planning, determining which of those would have been
+                                * copied to the final rtable, and matching them up. But it
+                                * doesn't seem like a worthwhile endeavor for right now,
+                                * because RTIs from such subqueries won't appear in the plan
+                                * tree itself, just in the range table. Hence, we can neither
+                                * generate nor accept advice for them.
+                                */
+                               if (strcmp(check->key.plan_name, rtinfo->plan_name) == 0
+                                       && !rtinfo->dummy)
+                               {
+                                       rtoffset = rtinfo->rtoffset;
+                                       Assert(rtoffset > 0);
+                                       break;
+                               }
+                       }
+
+                       /*
+                        * It's not an error if we don't find the plan name: that just
+                        * means that we planned a subplan by this name but it ended up
+                        * being a dummy subplan and so wasn't included in the final plan
+                        * tree.
+                        */
+                       if (rtoffset == 0)
+                               continue;
+               }
+
+               /*
+                * check->key.rti is the RTI that we saw prior to range-table
+                * flattening, so we must add the appropriate RT offset to get the
+                * final RTI.
+                */
+               flat_rti = check->key.rti + rtoffset;
+               Assert(flat_rti <= list_length(pstmt->rtable));
+
+               /* Assert that the string we compute now matches the previous one. */
+               rid_string = pgpa_identifier_string(&rt_identifiers[flat_rti - 1]);
+               Assert(strcmp(rid_string, check->rid_string) == 0);
+       }
+#endif
+}
diff --git a/contrib/pg_plan_advice/pgpa_planner.h b/contrib/pg_plan_advice/pgpa_planner.h

new file mode 100644 (file)

index 0000000..7d40b91
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_planner.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_planner.h
+ *       planner hooks
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_planner.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_PLANNER_H
+#define PGPA_PLANNER_H
+
+extern void pgpa_planner_install_hooks(void);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_scan.c b/contrib/pg_plan_advice/pgpa_scan.c

new file mode 100644 (file)

index 0000000..dbd7c99
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scan.c
@@ -0,0 +1,278 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.c
+ *       analysis of scans in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_scan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/parsenodes.h"
+#include "parser/parsetree.h"
+
+static pgpa_scan *pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                                                                pgpa_scan_strategy strategy,
+                                                                Bitmapset *relids,
+                                                                bool beneath_any_gather);
+
+
+static Bitmapset *filter_out_join_relids(Bitmapset *relids, List *rtable);
+static RTEKind unique_nonjoin_rtekind(Bitmapset *relids, List *rtable);
+
+/*
+ * Build a pgpa_scan object for a Plan node and update the plan walker
+ * context as appopriate.  If this is an Append or MergeAppend scan, also
+ * build pgpa_scan for any scans that were consolidated into this one by
+ * Append/MergeAppend pull-up.
+ *
+ * If there is at least one ElidedNode for this plan node, pass the uppermost
+ * one as elided_node, else pass NULL.
+ *
+ * Set the 'beneath_any_gather' node if we are underneath a Gather or
+ * Gather Merge node.
+ *
+ * Set the 'within_join_problem' flag if we're inside of a join problem and
+ * not otherwise.
+ */
+pgpa_scan *
+pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                               ElidedNode *elided_node,
+                               bool beneath_any_gather, bool within_join_problem)
+{
+       pgpa_scan_strategy strategy = PGPA_SCAN_ORDINARY;
+       Bitmapset  *relids = NULL;
+       int                     rti = -1;
+       List       *child_append_relid_sets = NIL;
+
+       if (elided_node != NULL)
+       {
+               NodeTag         elided_type = elided_node->elided_type;
+
+               /*
+                * If setrefs processing elided an Append or MergeAppend node that had
+                * only one surviving child, then this is a partitionwise "scan" --
+                * which may really be a partitionwise join, but there's no need to
+                * distinguish.
+                *
+                * If it's a trivial SubqueryScan that was elided, then this is an
+                * "ordinary" scan i.e. one for which we need to generate advice
+                * because the planner has not made any meaningful choice.
+                */
+               relids = elided_node->relids;
+               if (elided_type == T_Append || elided_type == T_MergeAppend)
+                       strategy = PGPA_SCAN_PARTITIONWISE;
+               else
+                       strategy = PGPA_SCAN_ORDINARY;
+
+               /* Join RTIs can be present, but advice never refers to them. */
+               relids = filter_out_join_relids(relids, walker->pstmt->rtable);
+       }
+       else if ((rti = pgpa_scanrelid(plan)) != 0)
+       {
+               relids = bms_make_singleton(rti);
+
+               switch (nodeTag(plan))
+               {
+                       case T_SeqScan:
+                               strategy = PGPA_SCAN_SEQ;
+                               break;
+                       case T_BitmapHeapScan:
+                               strategy = PGPA_SCAN_BITMAP_HEAP;
+                               break;
+                       case T_IndexScan:
+                               strategy = PGPA_SCAN_INDEX;
+                               break;
+                       case T_IndexOnlyScan:
+                               strategy = PGPA_SCAN_INDEX_ONLY;
+                               break;
+                       case T_TidScan:
+                       case T_TidRangeScan:
+                               strategy = PGPA_SCAN_TID;
+                               break;
+                       default:
+
+                               /*
+                                * This case includes a ForeignScan targeting a single
+                                * relation; no other strategy is possible in that case, but
+                                * see below, where things are different in multi-relation
+                                * cases.
+                                */
+                               strategy = PGPA_SCAN_ORDINARY;
+                               break;
+               }
+       }
+       else if ((relids = pgpa_relids(plan)) != NULL)
+       {
+               switch (nodeTag(plan))
+               {
+                       case T_ForeignScan:
+
+                               /*
+                                * If multiple relations are being targeted by a single
+                                * foreign scan, then the foreign join has been pushed to the
+                                * remote side, and we want that to be reflected in the
+                                * generated advice.
+                                */
+                               strategy = PGPA_SCAN_FOREIGN;
+                               break;
+                       case T_Append:
+
+                               /*
+                                * Append nodes can represent partitionwise scans of a a
+                                * relation, but when they implement a set operation, they are
+                                * just ordinary scans.
+                                */
+                               if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable)
+                                       == RTE_RELATION)
+                                       strategy = PGPA_SCAN_PARTITIONWISE;
+                               else
+                                       strategy = PGPA_SCAN_ORDINARY;
+                               child_append_relid_sets =
+                                       ((Append *) plan)->child_append_relid_sets;
+                               break;
+                       case T_MergeAppend:
+                               /* Some logic here as for Append, above. */
+                               if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable)
+                                       == RTE_RELATION)
+                                       strategy = PGPA_SCAN_PARTITIONWISE;
+                               else
+                                       strategy = PGPA_SCAN_ORDINARY;
+                               child_append_relid_sets =
+                                       ((MergeAppend *) plan)->child_append_relid_sets;
+                               break;
+                       default:
+                               strategy = PGPA_SCAN_ORDINARY;
+                               break;
+               }
+
+
+               /* Join RTIs can be present, but advice never refers to them. */
+               relids = filter_out_join_relids(relids, walker->pstmt->rtable);
+       }
+
+       /*
+        * If this is an Append or MergeAppend node into which subordinate Append
+        * or MergeAppend paths were merged, each of those merged paths is
+        * effectively another scan for which we need to account.
+        */
+       foreach_node(Bitmapset, child_relids, child_append_relid_sets)
+       {
+               Bitmapset  *child_nonjoin_relids;
+
+               child_nonjoin_relids = filter_out_join_relids(child_relids,
+                                                                                                         walker->pstmt->rtable);
+               (void) pgpa_make_scan(walker, plan, strategy,
+                                                         child_nonjoin_relids,
+                                                         beneath_any_gather);
+       }
+
+       /*
+        * If this plan node has no associated RTIs, it's not a scan. When the
+        * 'within_join_problem' flag is set, that's unexpected, so throw an
+        * error, else return quietly.
+        */
+       if (relids == NULL)
+       {
+               if (within_join_problem)
+                       elog(ERROR, "plan node has no RTIs: %d", (int) nodeTag(plan));
+               return NULL;
+       }
+
+       return pgpa_make_scan(walker, plan, strategy, relids, beneath_any_gather);
+}
+
+/*
+ * Create a single pgpa_scan object and update the pgpa_plan_walker_context.
+ */
+static pgpa_scan *
+pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                          pgpa_scan_strategy strategy, Bitmapset *relids,
+                          bool beneath_any_gather)
+{
+       pgpa_scan  *scan;
+
+       /* Create the scan object. */
+       scan = palloc(sizeof(pgpa_scan));
+       scan->plan = plan;
+       scan->strategy = strategy;
+       scan->relids = relids;
+       scan->beneath_any_gather = beneath_any_gather;
+
+       /* Add it to the appropriate list. */
+       walker->scans[scan->strategy] = lappend(walker->scans[scan->strategy],
+                                                                                       scan);
+
+       /*
+        * We intend to emit NO_GATHER() advice for each scan that doesn't appear
+        * beneath a Gather or Gather Merge node, but we need not do this for
+        * partitionwise scans, because emitting NO_GATHER() for the child scans
+        * suffices.
+        */
+       if (!scan->beneath_any_gather && scan->strategy != PGPA_SCAN_PARTITIONWISE)
+               walker->no_gather_scans = bms_add_members(walker->no_gather_scans,
+                                                                                                 scan->relids);
+
+       return scan;
+}
+
+/*
+ * Determine the unique rtekind of a set of relids.
+ */
+static RTEKind
+unique_nonjoin_rtekind(Bitmapset *relids, List *rtable)
+{
+       int                     rti = -1;
+       bool            first = true;
+       RTEKind         rtekind;
+
+       Assert(relids != NULL);
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+               if (rte->rtekind == RTE_JOIN)
+                       continue;
+
+               if (first)
+               {
+                       rtekind = rte->rtekind;
+                       first = false;
+               }
+               else if (rtekind != rte->rtekind)
+                       elog(ERROR, "rtekind mismatch: %d vs. %d",
+                                rtekind, rte->rtekind);
+       }
+
+       if (first)
+               elog(ERROR, "no non-RTE_JOIN RTEs found");
+
+       return rtekind;
+}
+
+/*
+ * Construct a new Bitmapset containing non-RTE_JOIN members of 'relids'.
+ */
+static Bitmapset *
+filter_out_join_relids(Bitmapset *relids, List *rtable)
+{
+       int                     rti = -1;
+       Bitmapset  *result = NULL;
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+               if (rte->rtekind != RTE_JOIN)
+                       result = bms_add_member(result, rti);
+       }
+
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_scan.h b/contrib/pg_plan_advice/pgpa_scan.h

new file mode 100644 (file)

index 0000000..90a08b4
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scan.h
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.h
+ *       analysis of scans in Plan trees
+ *
+ * For purposes of this module, a "scan" includes (1) single plan nodes that
+ * scan multiple RTIs, such as a degenerate Result node that replaces what
+ * would otherwise have been a join, and (2) Append and MergeAppend nodes
+ * implementing a partitionwise scan or a partitionwise join. Said
+ * differently, scans are the leaves of the join tree for a single join
+ * problem.
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_scan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_SCAN_H
+#define PGPA_SCAN_H
+
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_plan_walker_context pgpa_plan_walker_context;
+
+/*
+ * Scan strategies.
+ *
+ * PGPA_SCAN_ORDINARY is any scan strategy that isn't interesting to us
+ * because there is no meaningful planner decision involved. For example,
+ * the only way to scan a subquery is a SubqueryScan, and the only way to
+ * scan a VALUES construct is a ValuesScan. We need not care exactly which
+ * type of planner node was used in such cases, because the same thing will
+ * happen when replanning.
+ *
+ * PGPA_SCAN_ORDINARY also includes Result nodes that correspond to scans
+ * or even joins that are proved empty. We don't know whether or not the scan
+ * or join will still be provably empty at replanning time, but if it is,
+ * then no scan-type advice is needed, and if it's not, we can't recommend
+ * a scan type based on the current plan.
+ *
+ * PGPA_SCAN_PARTITIONWISE also lumps together scans and joins: this can
+ * be either a partitionwise scan of a partitioned table or a partitionwise
+ * join between several partitioned tables. Note that all decisions about
+ * whether or not to use partitionwise join are meaningful: no matter what
+ * we decided this time, we could do more or fewer things partitionwise the
+ * next time.
+ *
+ * PGPA_SCAN_FOREIGN is only used when there's more than one relation involved;
+ * a single-table foreign scan is classified as ordinary, since there is no
+ * decision to make in that case.
+ *
+ * Other scan strategies map one-to-one to plan nodes.
+ */
+typedef enum
+{
+       PGPA_SCAN_ORDINARY = 0,
+       PGPA_SCAN_SEQ,
+       PGPA_SCAN_BITMAP_HEAP,
+       PGPA_SCAN_FOREIGN,
+       PGPA_SCAN_INDEX,
+       PGPA_SCAN_INDEX_ONLY,
+       PGPA_SCAN_PARTITIONWISE,
+       PGPA_SCAN_TID
+       /* update NUM_PGPA_SCAN_STRATEGY if you add anything here */
+} pgpa_scan_strategy;
+
+#define NUM_PGPA_SCAN_STRATEGY ((int) PGPA_SCAN_TID + 1)
+
+/*
+ * All of the details we need regarding a scan.
+ */
+typedef struct pgpa_scan
+{
+       Plan       *plan;
+       pgpa_scan_strategy strategy;
+       Bitmapset  *relids;
+       bool            beneath_any_gather;
+} pgpa_scan;
+
+extern pgpa_scan *pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                                                                 ElidedNode *elided_node,
+                                                                 bool beneath_any_gather,
+                                                                 bool within_join_problem);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_scanner.l b/contrib/pg_plan_advice/pgpa_scanner.l

new file mode 100644 (file)

index 0000000..be7d7ba
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scanner.l
@@ -0,0 +1,299 @@
+%top{
+/*
+ * Scanner for plan advice
+ *
+ * Copyright (c) 2000-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_scanner.l
+ */
+#include "postgres.h"
+
+#include "common/string.h"
+#include "nodes/miscnodes.h"
+#include "parser/scansup.h"
+
+#include "pgpa_ast.h"
+#include "pgpa_parser.h"
+
+/*
+ * Extra data that we pass around when during scanning.
+ *
+ * 'litbuf' is used to implement the <xd> exclusive state, which handles
+ * double-quoted identifiers.
+ */
+typedef struct pgpa_yy_extra_type
+{
+       StringInfoData  litbuf;
+} pgpa_yy_extra_type;
+
+}
+
+%{
+/* LCOV_EXCL_START */
+
+#define YY_DECL \
+       extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result, \
+                                                 char **parse_error_msg_p, yyscan_t yyscanner)
+
+/* No reason to constrain amount of data slurped */
+#define YY_READ_BUF_SIZE 16777216
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#undef fprintf
+#define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)
+
+static void
+fprintf_to_ereport(const char *fmt, const char *msg)
+{
+       ereport(ERROR, (errmsg_internal("%s", msg)));
+}
+%}
+
+%option reentrant
+%option bison-bridge
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option noyyalloc
+%option noyyrealloc
+%option noyyfree
+%option warn
+%option prefix="pgpa_yy"
+%option extra-type="pgpa_yy_extra_type *"
+
+/*
+ * What follows is a severely stripped-down version of the core scanner. We
+ * only care about recognizing identifiers with or without identifier quoting
+ * (i.e. double-quoting), decimal integers, and a small handful of other
+ * things. Keep these rules in sync with src/backend/parser/scan.l. As in that
+ * file, we use an exclusive state called 'xc' for C-style comments, and an
+ * exclusive state called 'xd' for double-quoted identifiers.
+ */
+%x xc
+%x xd
+
+ident_start            [A-Za-z\200-\377_]
+ident_cont             [A-Za-z\200-\377_0-9\$]
+
+identifier             {ident_start}{ident_cont}*
+
+decdigit               [0-9]
+decinteger             {decdigit}(_?{decdigit})*
+
+space                  [ \t\n\r\f\v]
+whitespace             {space}+
+
+dquote                 \"
+xdstart                        {dquote}
+xdstop                 {dquote}
+xddouble               {dquote}{dquote}
+xdinside               [^"]+
+
+xcstart                        \/\*
+xcstop                 \*+\/
+xcinside               [^*/]+
+
+%%
+
+{whitespace}   { /* ignore */ }
+
+{identifier}   {
+                                       char   *str;
+                                       bool    fail;
+                                       pgpa_advice_tag_type    tag;
+
+                                       /*
+                                        * Unlike the core scanner, we don't truncate identifiers
+                                        * here. There is no obvious reason to do so.
+                                        */
+                                       str = downcase_identifier(yytext, yyleng, false, false);
+                                       yylval->str = str;
+
+                                       /*
+                                        * If it's not a tag, just return TOK_IDENT; else, return
+                                        * a token type based on how further parsing should
+                                        * proceed.
+                                        */
+                                       tag = pgpa_parse_advice_tag(str, &fail);
+                                       if (fail)
+                                               return TOK_IDENT;
+                                       else if (tag == PGPA_TAG_JOIN_ORDER)
+                                               return TOK_TAG_JOIN_ORDER;
+                                       else if (tag == PGPA_TAG_INDEX_SCAN ||
+                                                        tag == PGPA_TAG_INDEX_ONLY_SCAN)
+                                               return TOK_TAG_INDEX;
+                                       else if (tag == PGPA_TAG_BITMAP_HEAP_SCAN)
+                                               return TOK_TAG_BITMAP;
+                                       else if (tag == PGPA_TAG_SEQ_SCAN ||
+                                                        tag == PGPA_TAG_TID_SCAN ||
+                                                        tag == PGPA_TAG_NO_GATHER)
+                                               return TOK_TAG_SIMPLE;
+                                       else
+                                               return TOK_TAG_GENERIC;
+                               }
+
+{decinteger}   {
+                                       char   *endptr;
+
+                                       errno = 0;
+                                       yylval->integer = strtoint(yytext, &endptr, 10);
+                                       if (*endptr != '\0' || errno == ERANGE)
+                                               pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                        "integer out of range");
+                                       return TOK_INTEGER;
+                               }
+
+{xcstart}              {
+                                       BEGIN(xc);
+                               }
+
+{xdstart}              {
+                                       BEGIN(xd);
+                                       resetStringInfo(&yyextra->litbuf);
+                               }
+
+"||"                   { return TOK_OR; }
+
+"&&"                   { return TOK_AND; }
+
+.                              { return yytext[0]; }
+
+<xc>{xcstop}   {
+                                       BEGIN(INITIAL);
+                               }
+
+<xc>{xcinside} {
+                                       /* discard multiple characters without slash or asterisk */
+                               }
+
+<xc>.                  {
+                                       /*
+                                        * Discard any single character. flex prefers longer
+                                        * matches, so this rule will never be picked when we could
+                                        * have matched xcstop.
+                                        *
+                                        * NB: At present, we don't bother to support nested
+                                        * C-style comments here, but this logic could be extended
+                                        * if that restriction poses a problem.
+                                        */
+                               }
+
+<xc><<EOF>>            {
+                                       BEGIN(INITIAL);
+                                       pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                "unterminated comment");
+                               }
+
+<xd>{xdstop}   {
+                                       BEGIN(INITIAL);
+                                       yylval->str = pstrdup(yyextra->litbuf.data);
+                                       return TOK_IDENT;
+                               }
+
+<xd>{xddouble} {
+                                       appendStringInfoChar(&yyextra->litbuf, '"');
+                               }
+
+<xd>{xdinside} {
+                                       appendBinaryStringInfo(&yyextra->litbuf, yytext, yyleng);
+                               }
+
+<xd><<EOF>>            {
+                                       BEGIN(INITIAL);
+                                       pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                "unterminated quoted identifier");
+                               }
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+/*
+ * Handler for errors while scanning or parsing advice.
+ *
+ * bison passes the error message to us via 'message', and the context is
+ * available via the 'yytext' macro. We assemble those values into a final
+ * error text and then arrange to pass it back to the caller of pgpa_yyparse()
+ * by storing it into *parse_error_msg_p.
+ */
+void
+pgpa_yyerror(List **result, char **parse_error_msg_p, yyscan_t yyscanner,
+                        const char *message)
+{
+       struct yyguts_t *yyg = (struct yyguts_t *) yyscanner;   /* needed for yytext
+                                                                                                                        * macro */
+
+
+       /* report only the first error in a parse operation */
+       if (*parse_error_msg_p)
+               return;
+
+       if (yytext[0])
+               *parse_error_msg_p = psprintf("%s at or near \"%s\"", message, yytext);
+       else
+               *parse_error_msg_p = psprintf("%s at end of input", message);
+}
+
+/*
+ * Initialize the advice scanner.
+ *
+ * This should be called before parsing begins.
+ */
+void
+pgpa_scanner_init(const char *str, yyscan_t *yyscannerp)
+{
+       yyscan_t        yyscanner;
+       pgpa_yy_extra_type      *yyext = palloc0_object(pgpa_yy_extra_type);
+
+       if (yylex_init(yyscannerp) != 0)
+               elog(ERROR, "yylex_init() failed: %m");
+
+       yyscanner = *yyscannerp;
+
+       initStringInfo(&yyext->litbuf);
+       pgpa_yyset_extra(yyext, yyscanner);
+
+       yy_scan_string(str, yyscanner);
+}
+
+
+/*
+ * Shut down the advice scanner.
+ *
+ * This should be called after parsing is complete.
+ */
+void
+pgpa_scanner_finish(yyscan_t yyscanner)
+{
+       yylex_destroy(yyscanner);
+}
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+yyalloc(yy_size_t size, yyscan_t yyscanner)
+{
+       return palloc(size);
+}
+
+void *
+yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner)
+{
+       if (ptr)
+               return repalloc(ptr, size);
+       else
+               return palloc(size);
+}
+
+void
+yyfree(void *ptr, yyscan_t yyscanner)
+{
+       if (ptr)
+               pfree(ptr);
+}
diff --git a/contrib/pg_plan_advice/pgpa_trove.c b/contrib/pg_plan_advice/pgpa_trove.c

new file mode 100644 (file)

index 0000000..a92121f
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_trove.c
@@ -0,0 +1,490 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_trove.c
+ *       All of the advice given for a particular query, appropriately
+ *    organized for convenient access.
+ *
+ * This name comes from the English expression "trove of advice", which
+ * means a collection of wisdom. This slightly unusual term is chosen to
+ * avoid naming confusion; for example, "collection of advice" would
+ * invite confusion with pgpa_collector.c. Note that, while we don't know
+ * whether the provided advice is actually wise, it's not our job to
+ * question the user's choices.
+ *
+ * The goal of this module is to make it easy to locate the specific
+ * bits of advice that pertain to any given part of a query, or to
+ * determine that there are none.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_trove.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_trove.h"
+
+#include "common/hashfn_unstable.h"
+
+/*
+ * An advice trove is organized into a series of "slices", each of which
+ * contains information about one topic e.g. scan methods. Each slice consists
+ * of an array of trove entries plus a hash table that we can use to determine
+ * which ones are relevant to a particular part of the query.
+ */
+typedef struct pgpa_trove_slice
+{
+       unsigned        nallocated;
+       unsigned        nused;
+       pgpa_trove_entry *entries;
+       struct pgpa_trove_entry_hash *hash;
+} pgpa_trove_slice;
+
+/*
+ * Scan advice is stored into 'scan'; join advice is stored into 'join'; and
+ * advice that can apply to both cases is stored into 'rel'. This lets callers
+ * ask just for what's relevant. These slices correspond to the possible values
+ * of pgpa_trove_lookup_type.
+ */
+struct pgpa_trove
+{
+       pgpa_trove_slice join;
+       pgpa_trove_slice rel;
+       pgpa_trove_slice scan;
+};
+
+/*
+ * We're going to build a hash table to allow clients of this module to find
+ * relevant advice for a given part of the query quickly. However, we're going
+ * to use only three of the five key fields as hash keys. There are two reasons
+ * for this.
+ *
+ * First, it's allowable to set partition_schema to NULL to match a partition
+ * with the correct name in any schema.
+ *
+ * Second, we expect the "occurrence" and "partition_schema" portions of the
+ * relation identifiers to be mostly uninteresting. Most of the time, the
+ * occurrence field will be 1 and the partition_schema values will all be the
+ * same. Even when there is some variation, the absolute number of entries
+ * that have the same values for all three of these key fields should be
+ * quite small.
+ */
+typedef struct
+{
+       const char *alias_name;
+       const char *partition_name;
+       const char *plan_name;
+} pgpa_trove_entry_key;
+
+typedef struct
+{
+       pgpa_trove_entry_key key;
+       int                     status;
+       Bitmapset  *indexes;
+} pgpa_trove_entry_element;
+
+static uint32 pgpa_trove_entry_hash_key(pgpa_trove_entry_key key);
+
+static inline bool
+pgpa_trove_entry_compare_key(pgpa_trove_entry_key a, pgpa_trove_entry_key b)
+{
+       if (strcmp(a.alias_name, b.alias_name) != 0)
+               return false;
+
+       if (!strings_equal_or_both_null(a.partition_name, b.partition_name))
+               return false;
+
+       if (!strings_equal_or_both_null(a.plan_name, b.plan_name))
+               return false;
+
+       return true;
+}
+
+#define SH_PREFIX                      pgpa_trove_entry
+#define SH_ELEMENT_TYPE                pgpa_trove_entry_element
+#define SH_KEY_TYPE                    pgpa_trove_entry_key
+#define SH_KEY                         key
+#define SH_HASH_KEY(tb, key)   pgpa_trove_entry_hash_key(key)
+#define        SH_EQUAL(tb, a, b)      pgpa_trove_entry_compare_key(a, b)
+#define SH_SCOPE                       static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static void pgpa_init_trove_slice(pgpa_trove_slice *tslice);
+static void pgpa_trove_add_to_slice(pgpa_trove_slice *tslice,
+                                                                       pgpa_advice_tag_type tag,
+                                                                       pgpa_advice_target *target);
+static void pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash,
+                                                                  pgpa_advice_target *target,
+                                                                  int index);
+static Bitmapset *pgpa_trove_slice_lookup(pgpa_trove_slice *tslice,
+                                                                                 pgpa_identifier *rid);
+
+/*
+ * Build a trove of advice from a list of advice items.
+ *
+ * Caller can obtain a list of advice items to pass to this function by
+ * calling pgpa_parse().
+ */
+pgpa_trove *
+pgpa_build_trove(List *advice_items)
+{
+       pgpa_trove *trove = palloc_object(pgpa_trove);
+
+       pgpa_init_trove_slice(&trove->join);
+       pgpa_init_trove_slice(&trove->rel);
+       pgpa_init_trove_slice(&trove->scan);
+
+       foreach_ptr(pgpa_advice_item, item, advice_items)
+       {
+               switch (item->tag)
+               {
+                       case PGPA_TAG_JOIN_ORDER:
+                               {
+                                       pgpa_advice_target *target;
+
+                                       /*
+                                        * For most advice types, each element in the top-level
+                                        * list is a separate target, but it's most convenient to
+                                        * regard the entirety of a JOIN_ORDER specification as a
+                                        * single target. Since it wasn't represented that way
+                                        * during parsing, build a surrogate object now.
+                                        */
+                                       target = palloc0_object(pgpa_advice_target);
+                                       target->ttype = PGPA_TARGET_ORDERED_LIST;
+                                       target->children = item->targets;
+
+                                       pgpa_trove_add_to_slice(&trove->join,
+                                                                                       item->tag, target);
+                               }
+                               break;
+
+                       case PGPA_TAG_BITMAP_HEAP_SCAN:
+                       case PGPA_TAG_INDEX_ONLY_SCAN:
+                       case PGPA_TAG_INDEX_SCAN:
+                       case PGPA_TAG_SEQ_SCAN:
+                       case PGPA_TAG_TID_SCAN:
+
+                               /*
+                                * Scan advice.
+                                */
+                               foreach_ptr(pgpa_advice_target, target, item->targets)
+                               {
+                                       /*
+                                        * For now, all of our scan types target single relations,
+                                        * but in the future this might not be true, e.g. a custom
+                                        * scan could replace a join.
+                                        */
+                                       Assert(target->ttype == PGPA_TARGET_IDENTIFIER);
+                                       pgpa_trove_add_to_slice(&trove->scan,
+                                                                                       item->tag, target);
+                               }
+                               break;
+
+                       case PGPA_TAG_FOREIGN_JOIN:
+                       case PGPA_TAG_HASH_JOIN:
+                       case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+                       case PGPA_TAG_SEMIJOIN_UNIQUE:
+
+                               /*
+                                * Join strategy advice.
+                                */
+                               foreach_ptr(pgpa_advice_target, target, item->targets)
+                               {
+                                       pgpa_trove_add_to_slice(&trove->join,
+                                                                                       item->tag, target);
+                               }
+                               break;
+
+                       case PGPA_TAG_PARTITIONWISE:
+                       case PGPA_TAG_GATHER:
+                       case PGPA_TAG_GATHER_MERGE:
+                       case PGPA_TAG_NO_GATHER:
+
+                               /*
+                                * Advice about a RelOptInfo relevant to both scans and joins.
+                                */
+                               foreach_ptr(pgpa_advice_target, target, item->targets)
+                               {
+                                       pgpa_trove_add_to_slice(&trove->rel,
+                                                                                       item->tag, target);
+                               }
+                               break;
+               }
+       }
+
+       return trove;
+}
+
+/*
+ * Search a trove of advice for relevant entries.
+ *
+ * All parameters are input parameters except for *result, which is an output
+ * parameter used to return results to the caller.
+ */
+void
+pgpa_trove_lookup(pgpa_trove *trove, pgpa_trove_lookup_type type,
+                                 int nrids, pgpa_identifier *rids, pgpa_trove_result *result)
+{
+       pgpa_trove_slice *tslice;
+       Bitmapset  *indexes;
+
+       Assert(nrids > 0);
+
+       if (type == PGPA_TROVE_LOOKUP_SCAN)
+               tslice = &trove->scan;
+       else if (type == PGPA_TROVE_LOOKUP_JOIN)
+               tslice = &trove->join;
+       else
+               tslice = &trove->rel;
+
+       indexes = pgpa_trove_slice_lookup(tslice, &rids[0]);
+       for (int i = 1; i < nrids; ++i)
+       {
+               Bitmapset  *other_indexes;
+
+               /*
+                * If the caller is asking about two relations that aren't part of the
+                * same subquery, they've messed up.
+                */
+               Assert(strings_equal_or_both_null(rids[0].plan_name,
+                                                                                 rids[i].plan_name));
+
+               other_indexes = pgpa_trove_slice_lookup(tslice, &rids[i]);
+               indexes = bms_union(indexes, other_indexes);
+       }
+
+       result->entries = tslice->entries;
+       result->indexes = indexes;
+}
+
+/*
+ * Return all entries in a trove slice to the caller.
+ *
+ * The first two arguments are input arguments, and the remainder are output
+ * arguments.
+ */
+void
+pgpa_trove_lookup_all(pgpa_trove *trove, pgpa_trove_lookup_type type,
+                                         pgpa_trove_entry **entries, int *nentries)
+{
+       pgpa_trove_slice *tslice;
+
+       if (type == PGPA_TROVE_LOOKUP_SCAN)
+               tslice = &trove->scan;
+       else if (type == PGPA_TROVE_LOOKUP_JOIN)
+               tslice = &trove->join;
+       else
+               tslice = &trove->rel;
+
+       *entries = tslice->entries;
+       *nentries = tslice->nused;
+}
+
+/*
+ * Convert a trove entry to an item of plan advice that would produce it.
+ */
+char *
+pgpa_cstring_trove_entry(pgpa_trove_entry *entry)
+{
+       StringInfoData buf;
+
+       initStringInfo(&buf);
+       appendStringInfo(&buf, "%s", pgpa_cstring_advice_tag(entry->tag));
+
+       /* JOIN_ORDER tags are transformed by pgpa_build_trove; undo that here */
+       if (entry->tag != PGPA_TAG_JOIN_ORDER)
+               appendStringInfoChar(&buf, '(');
+       else
+               Assert(entry->target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+       pgpa_format_advice_target(&buf, entry->target);
+
+       if (entry->target->itarget != NULL)
+       {
+               appendStringInfoChar(&buf, ' ');
+               pgpa_format_index_target(&buf, entry->target->itarget);
+       }
+
+       if (entry->tag != PGPA_TAG_JOIN_ORDER)
+               appendStringInfoChar(&buf, ')');
+
+       return buf.data;
+}
+
+/*
+ * Set PGPA_TE_* flags on a set of trove entries.
+ */
+void
+pgpa_trove_set_flags(pgpa_trove_entry *entries, Bitmapset *indexes, int flags)
+{
+       int                     i = -1;
+
+       while ((i = bms_next_member(indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *entry = &entries[i];
+
+               entry->flags |= flags;
+       }
+}
+
+/*
+ * Add a new advice target to an existing pgpa_trove_slice object.
+ */
+static void
+pgpa_trove_add_to_slice(pgpa_trove_slice *tslice,
+                                               pgpa_advice_tag_type tag,
+                                               pgpa_advice_target *target)
+{
+       pgpa_trove_entry *entry;
+
+       if (tslice->nused >= tslice->nallocated)
+       {
+               int                     new_allocated;
+
+               new_allocated = tslice->nallocated * 2;
+               tslice->entries = repalloc_array(tslice->entries, pgpa_trove_entry,
+                                                                                new_allocated);
+               tslice->nallocated = new_allocated;
+       }
+
+       entry = &tslice->entries[tslice->nused];
+       entry->tag = tag;
+       entry->target = target;
+       entry->flags = 0;
+
+       pgpa_trove_add_to_hash(tslice->hash, target, tslice->nused);
+
+       tslice->nused++;
+}
+
+/*
+ * Update the hash table for a newly-added advice target.
+ */
+static void
+pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash, pgpa_advice_target *target,
+                                          int index)
+{
+       pgpa_trove_entry_key key;
+       pgpa_trove_entry_element *element;
+       bool            found;
+
+       /* For non-identifiers, add entries for all descendents. */
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       pgpa_trove_add_to_hash(hash, child_target, index);
+               }
+               return;
+       }
+
+       /* Sanity checks. */
+       Assert(target->rid.occurrence > 0);
+       Assert(target->rid.alias_name != NULL);
+
+       /* Add an entry for this relation identifier. */
+       key.alias_name = target->rid.alias_name;
+       key.partition_name = target->rid.partrel;
+       key.plan_name = target->rid.plan_name;
+       element = pgpa_trove_entry_insert(hash, key, &found);
+       element->indexes = bms_add_member(element->indexes, index);
+}
+
+/*
+ * Create and initialize a new pgpa_trove_slice object.
+ */
+static void
+pgpa_init_trove_slice(pgpa_trove_slice *tslice)
+{
+       /*
+        * In an ideal world, we'll make tslice->nallocated big enough that the
+        * array and hash table will be large enough to contain the number of
+        * advice items in this trove slice, but a generous default value is not
+        * good for performance, because pgpa_init_trove_slice() has to zero an
+        * amount of memory proportional to tslice->nallocated. Hence, we keep the
+        * starting value quite small, on the theory that advice strings will
+        * often be relatively short.
+        */
+       tslice->nallocated = 16;
+       tslice->nused = 0;
+       tslice->entries = palloc_array(pgpa_trove_entry, tslice->nallocated);
+       tslice->hash = pgpa_trove_entry_create(CurrentMemoryContext,
+                                                                                  tslice->nallocated, NULL);
+}
+
+/*
+ * Fast hash function for a key consisting of alias_name, partition_name,
+ * and plan_name.
+ */
+static uint32
+pgpa_trove_entry_hash_key(pgpa_trove_entry_key key)
+{
+       fasthash_state hs;
+       int                     sp_len;
+
+       fasthash_init(&hs, 0);
+
+       /* alias_name may not be NULL */
+       sp_len = fasthash_accum_cstring(&hs, key.alias_name);
+
+       /* partition_name and plan_name, however, can be NULL */
+       if (key.partition_name != NULL)
+               sp_len += fasthash_accum_cstring(&hs, key.partition_name);
+       if (key.plan_name != NULL)
+               sp_len += fasthash_accum_cstring(&hs, key.plan_name);
+
+       /*
+        * hashfn_unstable.h recommends using string length as tweak. It's not
+        * clear to me what to do if there are multiple strings, so for now I'm
+        * just using the total of all of the lengths.
+        */
+       return fasthash_final32(&hs, sp_len);
+}
+
+/*
+ * Look for matching entries.
+ */
+static Bitmapset *
+pgpa_trove_slice_lookup(pgpa_trove_slice *tslice, pgpa_identifier *rid)
+{
+       pgpa_trove_entry_key key;
+       pgpa_trove_entry_element *element;
+       Bitmapset  *result = NULL;
+
+       Assert(rid->occurrence >= 1);
+
+       key.alias_name = rid->alias_name;
+       key.partition_name = rid->partrel;
+       key.plan_name = rid->plan_name;
+
+       element = pgpa_trove_entry_lookup(tslice->hash, key);
+
+       if (element != NULL)
+       {
+               int                     i = -1;
+
+               while ((i = bms_next_member(element->indexes, i)) >= 0)
+               {
+                       pgpa_trove_entry *entry = &tslice->entries[i];
+
+                       /*
+                        * We know that this target or one of its descendents matches the
+                        * identifier on the three key fields above, but we don't know
+                        * which descendent or whether the occurence and schema also
+                        * match.
+                        */
+                       if (pgpa_identifier_matches_target(rid, entry->target))
+                               result = bms_add_member(result, i);
+               }
+       }
+
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_trove.h b/contrib/pg_plan_advice/pgpa_trove.h

new file mode 100644 (file)

index 0000000..479c3f7
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_trove.h
@@ -0,0 +1,113 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_trove.h
+ *       All of the advice given for a particular query, appropriately
+ *    organized for convenient access.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_trove.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_TROVE_H
+#define PGPA_TROVE_H
+
+#include "pgpa_ast.h"
+
+#include "nodes/bitmapset.h"
+
+typedef struct pgpa_trove pgpa_trove;
+
+/*
+ * Flags that can be set on a pgpa_trove_entry to indicate what happened when
+ * trying to plan using advice.
+ *
+ * PGPA_TE_MATCH_PARTIAL means that we found some part of the query that at
+ * least partially matched the target; e.g. given JOIN_ORDER(a b), this would
+ * be set if we ever saw any joinrel including either "a" or "b".
+ *
+ * PGPA_TE_MATCH_FULL means that we found an exact match for the target; e.g.
+ * given JOIN_ORDER(a b), this would be set if we saw a joinrel containing
+ * exactly "a" and "b" and nothing else.
+ *
+ * PGPA_TE_INAPPLICABLE means that the advice doesn't properly apply to the
+ * target; e.g. INDEX_SCAN(foo bar_idx) would be so marked if bar_idx does not
+ * exist on foo. The fact that this bit has been set does not mean that the
+ * advice had no effect.
+ *
+ * PGPA_TE_CONFLICTING means that a conflict was detected between what this
+ * advice wants and what some other plan advice wants; e.g. JOIN_ORDER(a b)
+ * would conflict with HASH_JOIN(a), because the former requires "a" to be the
+ * outer table while the latter requires it to be the inner table.
+ *
+ * PGPA_TE_FAILED means that the resulting plan did not conform to the advice.
+ */
+#define PGPA_TE_MATCH_PARTIAL          0x0001
+#define PGPA_TE_MATCH_FULL                     0x0002
+#define PGPA_TE_INAPPLICABLE           0x0004
+#define PGPA_TE_CONFLICTING                    0x0008
+#define PGPA_TE_FAILED                         0x0010
+
+/*
+ * Each entry in a trove of advice represents the application of a tag to
+ * a single target.
+ */
+typedef struct pgpa_trove_entry
+{
+       pgpa_advice_tag_type tag;
+       pgpa_advice_target *target;
+       int                     flags;
+} pgpa_trove_entry;
+
+/*
+ * What kind of information does the caller want to find in a trove?
+ *
+ * PGPA_TROVE_LOOKUP_SCAN means we're looking for scan advice.
+ *
+ * PGPA_TROVE_LOOKUP_JOIN means we're looking for join-related advice.
+ * This includes join order advice, join method advice, and semijoin-uniqueness
+ * advice.
+ *
+ * PGPA_TROVE_LOOKUP_REL means we're looking for general advice about this
+ * a RelOptInfo that may correspond to either a scan or a join. This includes
+ * gather-related advice and partitionwise advice. Note that partitionwise
+ * advice might seem like join advice, but that's not a helpful way of viewing
+ * the matter because (1) partitionwise advice is also relevant at the scan
+ * level and (2) other types of join advice affect only what to do from
+ * join_path_setup_hook, but partitionwise advice affects what to do in
+ * joinrel_setup_hook.
+ */
+typedef enum pgpa_trove_lookup_type
+{
+       PGPA_TROVE_LOOKUP_JOIN,
+       PGPA_TROVE_LOOKUP_REL,
+       PGPA_TROVE_LOOKUP_SCAN
+} pgpa_trove_lookup_type;
+
+/*
+ * This struct is used to store the result of a trove lookup. For each member
+ * of "indexes", the entry at the corresponding offset within "entries" is one
+ * of the results.
+ */
+typedef struct pgpa_trove_result
+{
+       pgpa_trove_entry *entries;
+       Bitmapset  *indexes;
+} pgpa_trove_result;
+
+extern pgpa_trove *pgpa_build_trove(List *advice_items);
+extern void pgpa_trove_lookup(pgpa_trove *trove,
+                                                         pgpa_trove_lookup_type type,
+                                                         int nrids,
+                                                         pgpa_identifier *rids,
+                                                         pgpa_trove_result *result);
+extern void pgpa_trove_lookup_all(pgpa_trove *trove,
+                                                                 pgpa_trove_lookup_type type,
+                                                                 pgpa_trove_entry **entries,
+                                                                 int *nentries);
+extern char *pgpa_cstring_trove_entry(pgpa_trove_entry *entry);
+extern void pgpa_trove_set_flags(pgpa_trove_entry *entries,
+                                                                Bitmapset *indexes, int flags);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c

new file mode 100644 (file)

index 0000000..7e4e388
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_walker.c
@@ -0,0 +1,862 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_walker.c
+ *       Plan tree iteration
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_walker.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/plannodes.h"
+
+static void pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan,
+                                                                 bool within_join_problem,
+                                                                 pgpa_join_unroller *join_unroller,
+                                                                 List *active_query_features,
+                                                                 bool beneath_any_gather);
+static Bitmapset *pgpa_process_unrolled_join(pgpa_plan_walker_context *walker,
+                                                                                        pgpa_unrolled_join *ujoin);
+
+static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *walker,
+                                                                                       pgpa_qf_type type,
+                                                                                       Plan *plan);
+
+static void pgpa_qf_add_rti(List *active_query_features, Index rti);
+static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids);
+static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan);
+
+static bool pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin,
+                                                                                  Index rtable_length,
+                                                                                  pgpa_identifier *rt_identifiers,
+                                                                                  pgpa_advice_target *target,
+                                                                                  bool toplevel);
+static bool pgpa_walker_join_order_matches_member(pgpa_join_member *member,
+                                                                                                 Index rtable_length,
+                                                                                                 pgpa_identifier *rt_identifiers,
+                                                                                                 pgpa_advice_target *target);
+static bool pgpa_walker_contains_scan(pgpa_plan_walker_context *walker,
+                                                                         pgpa_scan_strategy strategy,
+                                                                         Bitmapset *relids);
+static bool pgpa_walker_contains_feature(pgpa_plan_walker_context *walker,
+                                                                                pgpa_qf_type type,
+                                                                                Bitmapset *relids);
+static bool pgpa_walker_contains_join(pgpa_plan_walker_context *walker,
+                                                                         pgpa_join_strategy strategy,
+                                                                         Bitmapset *relids);
+static bool pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker,
+                                                                                  Bitmapset *relids);
+static Index pgpa_walker_get_rti(Index rtable_length,
+                                                                pgpa_identifier *rt_identifiers,
+                                                                pgpa_identifier *rid);
+
+/*
+ * Top-level entrypoint for the plan tree walk.
+ *
+ * Populates walker based on a traversal of the Plan trees in pstmt.
+ */
+void
+pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt)
+{
+       ListCell   *lc;
+
+       /* Initialization. */
+       memset(walker, 0, sizeof(pgpa_plan_walker_context));
+       walker->pstmt = pstmt;
+
+       /* Walk the main plan tree. */
+       pgpa_walk_recursively(walker, pstmt->planTree, 0, NULL, NIL, false);
+
+       /* Main plan tree walk won't reach subplans, so walk those. */
+       foreach(lc, pstmt->subplans)
+       {
+               Plan       *plan = lfirst(lc);
+
+               if (plan != NULL)
+                       pgpa_walk_recursively(walker, plan, 0, NULL, NIL, false);
+       }
+}
+
+/*
+ * Main workhorse for the plan tree walk.
+ *
+ * If within_join_problem is true, we encountered a join at some higher level
+ * of the tree walk and haven't yet descended out of the portion of the plan
+ * tree that is part of that same join problem. We're no longer in the same
+ * join problem if (1) we cross into a different subquery or (2) we descend
+ * through an Append or MergeAppend node, below which any further joins would
+ * be partitionwise joins planned separately from the outer join problem.
+ *
+ * If join_unroller != NULL, the join unroller code expects us to find a join
+ * that should be unrolled into that object. This implies that we're within a
+ * join problem, but the reverse is not true: when we've traversed all the
+ * joins but are still looking for the scan that is the leaf of the join tree,
+ * join_unroller will be NULL but within_join_problem will be true.
+ *
+ * Each element of active_query_features corresponds to some item of advice
+ * that needs to enumerate all the relations it affects. We add RTIs we find
+ * during tree traversal to each of these query features.
+ *
+ * If beneath_any_gather == true, some higher level of the tree traversal found
+ * a Gather or Gather Merge node.
+ */
+static void
+pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan,
+                                         bool within_join_problem,
+                                         pgpa_join_unroller *join_unroller,
+                                         List *active_query_features,
+                                         bool beneath_any_gather)
+{
+       pgpa_join_unroller *outer_join_unroller = NULL;
+       pgpa_join_unroller *inner_join_unroller = NULL;
+       bool            join_unroller_toplevel = false;
+       List       *pushdown_query_features = NIL;
+       ListCell   *lc;
+       List       *extraplans = NIL;
+       List       *elided_nodes = NIL;
+
+       Assert(within_join_problem || join_unroller == NULL);
+
+       /*
+        * If this is a Gather or Gather Merge node, directly add it to the list
+        * of currently-active query features.
+        *
+        * Otherwise, check the future_query_features list to see whether this was
+        * previously identified as a plan node that needs to be treated as a
+        * query feature.
+        *
+        * Note that the caller also has a copy to active_query_features, so we
+        * can't destructively modify it without making a copy.
+        */
+       if (IsA(plan, Gather))
+       {
+               active_query_features =
+                       lappend(list_copy(active_query_features),
+                                       pgpa_add_feature(walker, PGPAQF_GATHER, plan));
+               beneath_any_gather = true;
+       }
+       else if (IsA(plan, GatherMerge))
+       {
+               active_query_features =
+                       lappend(list_copy(active_query_features),
+                                       pgpa_add_feature(walker, PGPAQF_GATHER_MERGE, plan));
+               beneath_any_gather = true;
+       }
+       else
+       {
+               foreach_ptr(pgpa_query_feature, qf, walker->future_query_features)
+               {
+                       if (qf->plan == plan)
+                       {
+                               active_query_features = list_copy(active_query_features);
+                               active_query_features = lappend(active_query_features, qf);
+                               walker->future_query_features =
+                                       list_delete_ptr(walker->future_query_features, plan);
+                               break;
+                       }
+               }
+       }
+
+       /*
+        * Find all elided nodes for this Plan node.
+        */
+       foreach_node(ElidedNode, n, walker->pstmt->elidedNodes)
+       {
+               if (n->plan_node_id == plan->plan_node_id)
+                       elided_nodes = lappend(elided_nodes, n);
+       }
+
+       /* If we found any elided_nodes, handle them. */
+       if (elided_nodes != NIL)
+       {
+               int                     num_elided_nodes = list_length(elided_nodes);
+               ElidedNode *last_elided_node;
+
+               /*
+                * RTIs for the final -- and thus logically uppermost -- elided node
+                * should be collected for query features passed down by the caller.
+                * However, elided nodes act as barriers to query features, which
+                * means that (1) the remaining elided nodes, if any, should be
+                * ignored for purposes of query features and (2) the list of active
+                * query features should be reset to empty so that we do not add RTIs
+                * from the plan node that is logically beneath the elided node to the
+                * query features passed down from the caller.
+                */
+               last_elided_node = list_nth(elided_nodes, num_elided_nodes - 1);
+               pgpa_qf_add_rtis(active_query_features, last_elided_node->relids);
+               active_query_features = NIL;
+
+               /*
+                * If we're within a join problem, the join_unroller is responsible
+                * for building the scan for the final elided node, so throw it out.
+                */
+               if (within_join_problem)
+                       elided_nodes = list_truncate(elided_nodes, num_elided_nodes - 1);
+
+               /* Build scans for all (or the remaining) elided nodes. */
+               foreach_node(ElidedNode, elided_node, elided_nodes)
+               {
+                       (void) pgpa_build_scan(walker, plan, elided_node,
+                                                                  beneath_any_gather, within_join_problem);
+               }
+
+               /*
+                * If there were any elided nodes, then everything beneath those nodes
+                * is not part of the same join problem.
+                *
+                * In more detail, if an Append or MergeAppend was elided, then a
+                * partitionwise join was chosen and only a single child survived; if
+                * a SubqueryScan was elided, the subquery was planned without
+                * flattening it into the parent.
+                */
+               within_join_problem = false;
+               join_unroller = NULL;
+       }
+
+       /*
+        * If we're within a join problem, the join unroller is responsible for
+        * building any required scan for this node. If not, we do it here.
+        */
+       if (!within_join_problem)
+               (void) pgpa_build_scan(walker, plan, NULL, beneath_any_gather, false);
+
+       /*
+        * If this join needs to unrolled but there's no join unroller already
+        * available, create one.
+        */
+       if (join_unroller == NULL && pgpa_is_join(plan))
+       {
+               join_unroller = pgpa_create_join_unroller();
+               join_unroller_toplevel = true;
+               within_join_problem = true;
+       }
+
+       /*
+        * If this join is to be unrolled, pgpa_unroll_join() will return the join
+        * unroller object that should be passed down when we recurse into the
+        * outer and inner sides of the plan.
+        */
+       if (join_unroller != NULL)
+               pgpa_unroll_join(walker, plan, beneath_any_gather, join_unroller,
+                                                &outer_join_unroller, &inner_join_unroller);
+
+       /* Add RTIs from the plan node to all active query features. */
+       pgpa_qf_add_plan_rtis(active_query_features, plan);
+
+       /*
+        * Recurse into the outer and inner subtrees.
+        *
+        * As an exception, if this is a ForeignScan, don't recurse. postgres_fdw
+        * sometimes stores an EPQ recheck plan in plan->leftree, but that's going
+        * to mention the same set of relations as the ForeignScan itself, and we
+        * have no way to emit advice targeting the EPQ case vs. the non-EPQ case.
+        * Moreover, it's not entirely clear what other FDWs might do with the
+        * left and right subtrees. Maybe some better handling is needed here, but
+        * for now, we just punt.
+        */
+       if (!IsA(plan, ForeignScan))
+       {
+               if (plan->lefttree != NULL)
+                       pgpa_walk_recursively(walker, plan->lefttree, within_join_problem,
+                                                                 outer_join_unroller, active_query_features,
+                                                                 beneath_any_gather);
+               if (plan->righttree != NULL)
+                       pgpa_walk_recursively(walker, plan->righttree, within_join_problem,
+                                                                 inner_join_unroller, active_query_features,
+                                                                 beneath_any_gather);
+       }
+
+       /*
+        * If we created a join unroller up above, then it's also our join to use
+        * it to build the final pgpa_unrolled_join, and to destroy the object.
+        */
+       if (join_unroller_toplevel)
+       {
+               pgpa_unrolled_join *ujoin;
+
+               ujoin = pgpa_build_unrolled_join(walker, join_unroller);
+               walker->toplevel_unrolled_joins =
+                       lappend(walker->toplevel_unrolled_joins, ujoin);
+               pgpa_destroy_join_unroller(join_unroller);
+               (void) pgpa_process_unrolled_join(walker, ujoin);
+       }
+
+       /*
+        * Some plan types can have additional children. Nodes like Append that
+        * can have any number of children store them in a List; a SubqueryScan
+        * just has a field for a single additional Plan.
+        */
+       switch (nodeTag(plan))
+       {
+               case T_Append:
+                       {
+                               Append     *aplan = (Append *) plan;
+
+                               extraplans = aplan->appendplans;
+                               if (bms_is_empty(aplan->apprelids))
+                                       pushdown_query_features = active_query_features;
+                       }
+                       break;
+               case T_MergeAppend:
+                       {
+                               MergeAppend *maplan = (MergeAppend *) plan;
+
+                               extraplans = maplan->mergeplans;
+                               if (bms_is_empty(maplan->apprelids))
+                                       pushdown_query_features = active_query_features;
+                       }
+                       break;
+               case T_BitmapAnd:
+                       extraplans = ((BitmapAnd *) plan)->bitmapplans;
+                       break;
+               case T_BitmapOr:
+                       extraplans = ((BitmapOr *) plan)->bitmapplans;
+                       break;
+               case T_SubqueryScan:
+
+                       /*
+                        * We don't pass down active_query_features across here, because
+                        * those are specific to a subquery level.
+                        */
+                       pgpa_walk_recursively(walker, ((SubqueryScan *) plan)->subplan,
+                                                                 0, NULL, NIL, beneath_any_gather);
+                       break;
+               case T_CustomScan:
+                       extraplans = ((CustomScan *) plan)->custom_plans;
+                       break;
+               default:
+                       break;
+       }
+
+       /* If we found a list of extra children, iterate over it. */
+       foreach(lc, extraplans)
+       {
+               Plan       *subplan = lfirst(lc);
+
+               pgpa_walk_recursively(walker, subplan, 0, NULL, pushdown_query_features,
+                                                         beneath_any_gather);
+       }
+}
+
+/*
+ * Perform final processing of a newly-constructed pgpa_unrolled_join. This
+ * only needs to be called for toplevel pgpa_unrolled_join objects, since it
+ * recurses to sub-joins as needed.
+ *
+ * Our goal is to add the set of inner relids to the relevant join_strategies
+ * list, and to do the same for any sub-joins. To that end, the return value
+ * is the set of relids found beneath the inner side of the join, but it is
+ * expected that the toplevel caller will ignore this.
+ */
+static Bitmapset *
+pgpa_process_unrolled_join(pgpa_plan_walker_context *walker,
+                                                  pgpa_unrolled_join *ujoin)
+{
+       Bitmapset  *all_relids = NULL;
+
+       for (int k = 0; k < ujoin->ninner; ++k)
+       {
+               pgpa_join_member *member = &ujoin->inner[k];
+               Bitmapset  *relids;
+
+               if (member->unrolled_join != NULL)
+                       relids = pgpa_process_unrolled_join(walker,
+                                                                                               member->unrolled_join);
+               else
+               {
+                       Assert(member->scan != NULL);
+                       relids = member->scan->relids;
+               }
+               walker->join_strategies[ujoin->strategy[k]] =
+                       lappend(walker->join_strategies[ujoin->strategy[k]], relids);
+               all_relids = bms_add_members(all_relids, relids);
+       }
+
+       return all_relids;
+}
+
+/*
+ * Arrange for the given plan node to be treated as a query feature when the
+ * tree walk reaches it.
+ *
+ * Make sure to only use this for nodes that the tree walk can't have reached
+ * yet!
+ */
+void
+pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+                                               pgpa_qf_type type, Plan *plan)
+{
+       pgpa_query_feature *qf = pgpa_add_feature(walker, type, plan);
+
+       walker->future_query_features =
+               lappend(walker->future_query_features, qf);
+}
+
+/*
+ * Return the last of any elided nodes associated with this plan node ID.
+ *
+ * The last elided node is the one that would have been uppermost in the plan
+ * tree had it not been removed during setrefs processig.
+ */
+ElidedNode *
+pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan)
+{
+       ElidedNode *elided_node = NULL;
+
+       foreach_node(ElidedNode, n, pstmt->elidedNodes)
+       {
+               if (n->plan_node_id == plan->plan_node_id)
+                       elided_node = n;
+       }
+
+       return elided_node;
+}
+
+/*
+ * Certain plan nodes can refer to a set of RTIs. Extract and return the set.
+ */
+Bitmapset *
+pgpa_relids(Plan *plan)
+{
+       if (IsA(plan, Result))
+               return ((Result *) plan)->relids;
+       else if (IsA(plan, ForeignScan))
+               return ((ForeignScan *) plan)->fs_relids;
+       else if (IsA(plan, Append))
+               return ((Append *) plan)->apprelids;
+       else if (IsA(plan, MergeAppend))
+               return ((MergeAppend *) plan)->apprelids;
+
+       return NULL;
+}
+
+/*
+ * Extract the scanned RTI from a plan node.
+ *
+ * Returns 0 if there isn't one.
+ */
+Index
+pgpa_scanrelid(Plan *plan)
+{
+       switch (nodeTag(plan))
+       {
+               case T_SeqScan:
+               case T_SampleScan:
+               case T_BitmapHeapScan:
+               case T_TidScan:
+               case T_TidRangeScan:
+               case T_SubqueryScan:
+               case T_FunctionScan:
+               case T_TableFuncScan:
+               case T_ValuesScan:
+               case T_CteScan:
+               case T_NamedTuplestoreScan:
+               case T_WorkTableScan:
+               case T_ForeignScan:
+               case T_CustomScan:
+               case T_IndexScan:
+               case T_IndexOnlyScan:
+                       return ((Scan *) plan)->scanrelid;
+               default:
+                       return 0;
+       }
+}
+
+/*
+ * Create a pgpa_query_feature and add it to the list of all query features
+ * for this plan.
+ */
+static pgpa_query_feature *
+pgpa_add_feature(pgpa_plan_walker_context *walker,
+                                pgpa_qf_type type, Plan *plan)
+{
+       pgpa_query_feature *qf = palloc0_object(pgpa_query_feature);
+
+       qf->type = type;
+       qf->plan = plan;
+
+       walker->query_features[qf->type] =
+               lappend(walker->query_features[qf->type], qf);
+
+       return qf;
+}
+
+/*
+ * Add a single RTI to each active query feature.
+ */
+static void
+pgpa_qf_add_rti(List *active_query_features, Index rti)
+{
+       foreach_ptr(pgpa_query_feature, qf, active_query_features)
+       {
+               qf->relids = bms_add_member(qf->relids, rti);
+       }
+}
+
+/*
+ * Add a set of RTIs to each active query feature.
+ */
+static void
+pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids)
+{
+       foreach_ptr(pgpa_query_feature, qf, active_query_features)
+       {
+               qf->relids = bms_add_members(qf->relids, relids);
+       }
+}
+
+/*
+ * Add RTIs directly contained in a plan node to each active query feature.
+ */
+static void
+pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan)
+{
+       Bitmapset  *relids;
+       Index           rti;
+
+       if ((relids = pgpa_relids(plan)) != NULL)
+               pgpa_qf_add_rtis(active_query_features, relids);
+       else if ((rti = pgpa_scanrelid(plan)) != 0)
+               pgpa_qf_add_rti(active_query_features, rti);
+}
+
+/*
+ * If we generated plan advice using the provided walker object and array
+ * of identifiers, would we generate the specified tag/target combination?
+ *
+ * If yes, the plan conforms to the advice; if no, it does not. Note that
+ * we have know way of knowing whether the planner was forced to emit a plan
+ * that conformed to the advice or just happened to do so.
+ */
+bool
+pgpa_walker_would_advise(pgpa_plan_walker_context *walker,
+                                                pgpa_identifier *rt_identifiers,
+                                                pgpa_advice_tag_type tag,
+                                                pgpa_advice_target *target)
+{
+       Index           rtable_length = list_length(walker->pstmt->rtable);
+       Bitmapset  *relids = NULL;
+
+       if (tag == PGPA_TAG_JOIN_ORDER)
+       {
+               foreach_ptr(pgpa_unrolled_join, ujoin, walker->toplevel_unrolled_joins)
+               {
+                       if (pgpa_walker_join_order_matches(ujoin, rtable_length,
+                                                                                          rt_identifiers, target, true))
+                               return true;
+               }
+
+               return false;
+       }
+
+       if (target->ttype == PGPA_TARGET_IDENTIFIER)
+       {
+               Index           rti;
+
+               rti = pgpa_walker_get_rti(rtable_length, rt_identifiers, &target->rid);
+               relids = bms_make_singleton(rti);
+       }
+       else
+       {
+               Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       Index           rti;
+
+                       Assert(child_target->ttype == PGPA_TARGET_IDENTIFIER);
+                       rti = pgpa_compute_rti_from_identifier(rtable_length,
+                                                                                                  rt_identifiers,
+                                                                                                  &child_target->rid);
+                       if (rti == 0)
+                               elog(ERROR, "cannot determine RTI for advice target");
+                       relids = bms_add_member(relids, rti);
+               }
+       }
+
+       switch (tag)
+       {
+               case PGPA_TAG_JOIN_ORDER:
+                       /* should have been handled above */
+                       pg_unreachable();
+                       break;
+               case PGPA_TAG_BITMAP_HEAP_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_BITMAP_HEAP,
+                                                                                        relids);
+               case PGPA_TAG_FOREIGN_JOIN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_FOREIGN,
+                                                                                        relids);
+               case PGPA_TAG_INDEX_ONLY_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_INDEX_ONLY,
+                                                                                        relids);
+               case PGPA_TAG_INDEX_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_INDEX,
+                                                                                        relids);
+               case PGPA_TAG_PARTITIONWISE:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_PARTITIONWISE,
+                                                                                        relids);
+               case PGPA_TAG_SEQ_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_SEQ,
+                                                                                        relids);
+               case PGPA_TAG_TID_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_TID,
+                                                                                        relids);
+               case PGPA_TAG_GATHER:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_GATHER,
+                                                                                               relids);
+               case PGPA_TAG_GATHER_MERGE:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_GATHER_MERGE,
+                                                                                               relids);
+               case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_SEMIJOIN_NON_UNIQUE,
+                                                                                               relids);
+               case PGPA_TAG_SEMIJOIN_UNIQUE:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_SEMIJOIN_UNIQUE,
+                                                                                               relids);
+               case PGPA_TAG_HASH_JOIN:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_HASH_JOIN,
+                                                                                        relids);
+               case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_MERGE_JOIN_MATERIALIZE,
+                                                                                        relids);
+               case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_MERGE_JOIN_PLAIN,
+                                                                                        relids);
+               case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_NESTED_LOOP_MATERIALIZE,
+                                                                                        relids);
+               case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_NESTED_LOOP_MEMOIZE,
+                                                                                        relids);
+               case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_NESTED_LOOP_PLAIN,
+                                                                                        relids);
+               case PGPA_TAG_NO_GATHER:
+                       return pgpa_walker_contains_no_gather(walker, relids);
+       }
+
+       /* should not get here */
+       return false;
+}
+
+/*
+ * Does an unrolled join match the join order specified by an advice target?
+ */
+static bool
+pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin,
+                                                          Index rtable_length,
+                                                          pgpa_identifier *rt_identifiers,
+                                                          pgpa_advice_target *target,
+                                                          bool toplevel)
+{
+       int             nchildren = list_length(target->children);
+
+       Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+       /* At toplevel, we allow a prefix match. */
+       if (toplevel)
+       {
+               if (nchildren > ujoin->ninner + 1)
+                       return false;
+       }
+       else
+       {
+               if (nchildren != ujoin->ninner + 1)
+                       return false;
+       }
+
+       /* Outermost rel must match. */
+       if (!pgpa_walker_join_order_matches_member(&ujoin->outer,
+                                                                                          rtable_length,
+                                                                                          rt_identifiers,
+                                                                                          linitial(target->children)))
+               return false;
+
+       /* Each inner rel must match. */
+       for (int n = 0; n < nchildren - 1; ++n)
+       {
+               pgpa_advice_target *child_target = list_nth(target->children, n + 1);
+
+               if (!pgpa_walker_join_order_matches_member(&ujoin->inner[n],
+                                                                                                  rtable_length,
+                                                                                                  rt_identifiers,
+                                                                                                  child_target))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Does one member of an unrolled join match an advice target?
+ */
+static bool
+pgpa_walker_join_order_matches_member(pgpa_join_member *member,
+                                                                         Index rtable_length,
+                                                                         pgpa_identifier *rt_identifiers,
+                                                                         pgpa_advice_target *target)
+{
+       Bitmapset  *relids = NULL;
+
+       if (member->unrolled_join != NULL)
+       {
+               if (target->ttype != PGPA_TARGET_ORDERED_LIST)
+                       return false;
+               return pgpa_walker_join_order_matches(member->unrolled_join,
+                                                                                         rtable_length,
+                                                                                         rt_identifiers,
+                                                                                         target,
+                                                                                         false);
+       }
+
+       Assert(member->scan != NULL);
+       switch (target->ttype)
+       {
+               case PGPA_TARGET_ORDERED_LIST:
+                       /* Could only match an unrolled join */
+                       return false;
+
+               case PGPA_TARGET_UNORDERED_LIST:
+                       {
+                               foreach_ptr(pgpa_advice_target, child_target, target->children)
+                               {
+                                       Index           rti;
+
+                                       rti = pgpa_walker_get_rti(rtable_length, rt_identifiers,
+                                                                                         &child_target->rid);
+                                       relids = bms_add_member(relids, rti);
+                               }
+                               break;
+                       }
+
+               case PGPA_TARGET_IDENTIFIER:
+                       {
+                               Index           rti;
+
+                               rti = pgpa_walker_get_rti(rtable_length, rt_identifiers,
+                                                                                 &target->rid);
+                               relids = bms_make_singleton(rti);
+                               break;
+                       }
+       }
+
+       return bms_equal(member->scan->relids, relids);
+}
+
+/*
+ * Does this walker say that the given scan strategy should be used for the
+ * given relid set?
+ */
+static bool
+pgpa_walker_contains_scan(pgpa_plan_walker_context *walker,
+                                                 pgpa_scan_strategy strategy,
+                                                 Bitmapset *relids)
+{
+       List       *scans = walker->scans[strategy];
+
+       foreach_ptr(pgpa_scan, scan, scans)
+       {
+               /*
+                * XXX. If this is index-related advice, we should also validate that
+                * the advice target's index target matches the Plan tree.
+                */
+               if (bms_equal(scan->relids, relids))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Does this walker say that the given query feature applies to the given
+ * relid set?
+ */
+static bool
+pgpa_walker_contains_feature(pgpa_plan_walker_context *walker,
+                                                        pgpa_qf_type type,
+                                                        Bitmapset *relids)
+{
+       List       *query_features = walker->query_features[type];
+
+       foreach_ptr(pgpa_query_feature, qf, query_features)
+       {
+               if (bms_equal(qf->relids, relids))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Does the walker say that the given join strategy should be used for the
+ * given relid set?
+ */
+static bool
+pgpa_walker_contains_join(pgpa_plan_walker_context *walker,
+                                                 pgpa_join_strategy strategy,
+                                                 Bitmapset *relids)
+{
+       List       *join_strategies = walker->join_strategies[strategy];
+
+       foreach_ptr(Bitmapset, jsrelids, join_strategies)
+       {
+               if (bms_equal(jsrelids, relids))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Does the walker say that the given relids should be marked as NO_GATHER?
+ */
+static bool
+pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker,
+                                                          Bitmapset *relids)
+{
+       return bms_is_subset(relids, walker->no_gather_scans);
+}
+
+/*
+ * Convenience function to convert a relation identifier to an RTI.
+ *
+ * We throw an error here because we expect this to be used on system-generated
+ * advice. Hence, failure here indicates an advice generation bug.
+ */
+static Index
+pgpa_walker_get_rti(Index rtable_length,
+                                       pgpa_identifier *rt_identifiers,
+                                       pgpa_identifier *rid)
+{
+       Index           rti;
+
+       rti = pgpa_compute_rti_from_identifier(rtable_length,
+                                                                                  rt_identifiers,
+                                                                                  rid);
+       if (rti == 0)
+               elog(ERROR, "cannot determine RTI for advice target");
+       return rti;
+}
diff --git a/contrib/pg_plan_advice/pgpa_walker.h b/contrib/pg_plan_advice/pgpa_walker.h

new file mode 100644 (file)

index 0000000..d6584c0
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_walker.h
@@ -0,0 +1,121 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_walker.h
+ *       Plan tree iteration
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_walker.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_WALKER_H
+#define PGPA_WALKER_H
+
+#include "pgpa_ast.h"
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+
+/*
+ * We use the term "query feature" to refer to plan nodes that are interesting
+ * in the following way: to generate advice, we'll need to know the set of
+ * same-subquery, non-join RTIs occuring at or below that plan node, without
+ * admixture of parent and child RTIs.
+ *
+ * For example, Gather nodes, desiginated by PGPAQF_GATHER, and Gather Merge
+ * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll
+ * want to admit some kind of advice that describes the portion of the plan
+ * tree that appears beneath those nodes.
+ *
+ * Each semijoin can be implemented either by directly performing a semijoin,
+ * or by making one side unique and then performing a normal join. Either way,
+ * we use a query feature to notice what decision was made, so that we can
+ * describe it by enumerating the RTIs on that side of the join.
+ *
+ * To elaborate on the "no admixture of parent and child RTIs" rule, in all of
+ * these cases, if the entirety of an inheritance hierarchy appears beneath
+ * the query feature, we only want to name the parent table. But it's also
+ * possible to have cases where we must name child tables. This is particularly
+ * likely to happen when partitionwise join is in use, but could happen for
+ * Gather or Gather Merge even without that, if one of those appears below
+ * an Append or MergeAppend node for a single table.
+ */
+typedef enum pgpa_qf_type
+{
+       PGPAQF_GATHER,
+       PGPAQF_GATHER_MERGE,
+       PGPAQF_SEMIJOIN_NON_UNIQUE,
+       PGPAQF_SEMIJOIN_UNIQUE
+       /* update NUM_PGPA_QF_TYPES if you add anything here */
+} pgpa_qf_type;
+
+#define NUM_PGPA_QF_TYPES ((int) PGPAQF_SEMIJOIN_UNIQUE + 1)
+
+/*
+ * For each query feature, we keep track of the feature type and the set of
+ * relids that we found underneath the relevant plan node. See the comments
+ * on pgpa_qf_type, above, for additional details.
+ */
+typedef struct pgpa_query_feature
+{
+       pgpa_qf_type type;
+       Plan       *plan;
+       Bitmapset  *relids;
+} pgpa_query_feature;
+
+/*
+ * Context object for plan tree walk.
+ *
+ * pstmt is the PlannedStmt we're studying.
+ *
+ * scans is an array of lists of pgpa_scan objects. The array is indexed by
+ * the scan's pgpa_scan_strategy.
+ *
+ * no_gather_scans is the set of scan RTIs that do not appear beneath any
+ * Gather or Gather Merge node.
+ *
+ * toplevel_unrolled_joins is a list of all pgpa_unrolled_join objects that
+ * are not a child of some other pgpa_unrolled_join.
+ *
+ * join_strategy is an array of lists of Bitmapset objects. Each Bitmapset
+ * is the set of relids that appears on the inner side of some join (excluding
+ * RTIs from partition children and subqueries). The array is indexed by
+ * pgpa_join_strategy.
+ *
+ * query_features is an array lists of pgpa_query_feature objects, indexed
+ * by pgpa_qf_type.
+ *
+ * future_query_features is only used during the plan tree walk and should
+ * be empty when the tree walk concludes. It is a list of pgpa_query_feature
+ * objects for Plan nodes that the plan tree walk has not yet encountered;
+ * when encountered, they will be moved to the list of active query features
+ * that is propagated via the call stack.
+ */
+typedef struct pgpa_plan_walker_context
+{
+       PlannedStmt *pstmt;
+       List       *scans[NUM_PGPA_SCAN_STRATEGY];
+       Bitmapset  *no_gather_scans;
+       List       *toplevel_unrolled_joins;
+       List       *join_strategies[NUM_PGPA_JOIN_STRATEGY];
+       List       *query_features[NUM_PGPA_QF_TYPES];
+       List       *future_query_features;
+} pgpa_plan_walker_context;
+
+extern void pgpa_plan_walker(pgpa_plan_walker_context *walker,
+                                                        PlannedStmt *pstmt);
+
+extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+                                                                       pgpa_qf_type type,
+                                                                       Plan *plan);
+
+extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan);
+extern Bitmapset *pgpa_relids(Plan *plan);
+extern Index pgpa_scanrelid(Plan *plan);
+
+extern bool pgpa_walker_would_advise(pgpa_plan_walker_context *walker,
+                                                                        pgpa_identifier *rt_identifiers,
+                                                                        pgpa_advice_tag_type tag,
+                                                                        pgpa_advice_target *target);
+
+#endif
diff --git a/contrib/pg_plan_advice/sql/gather.sql b/contrib/pg_plan_advice/sql/gather.sql

new file mode 100644 (file)

index 0000000..5828004
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/gather.sql
@@ -0,0 +1,76 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 1;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+SET min_parallel_table_scan_size = 0;
+SET debug_parallel_query = off;
+
+CREATE TABLE gt_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE gt_dim;
+
+CREATE TABLE gt_fact (
+       id int not null,
+       dim_id integer not null references gt_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO gt_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE gt_fact;
+
+-- By default, we expect Gather Merge with a parallel hash join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+
+-- Force Gather or Gather Merge of both relations together.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force a separate Gather or Gather Merge operation for each relation.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force a Gather or Gather Merge on one relation but no parallelism on other.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force no Gather or Gather Merge use at all.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'no_gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/join_order.sql b/contrib/pg_plan_advice/sql/join_order.sql

new file mode 100644 (file)

index 0000000..5aa2fc6
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/join_order.sql
@@ -0,0 +1,96 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,100) g;
+VACUUM ANALYZE jo_dim1;
+CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim2 (id, dim2, val2)
+       SELECT g, 'some filler text ' || g, (g % 7) + 1
+         FROM generate_series(1,1000) g;
+VACUUM ANALYZE jo_dim2;
+
+CREATE TABLE jo_fact (
+       id int primary key,
+       dim1_id integer not null references jo_dim1 (id),
+       dim2_id integer not null references jo_dim2 (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO jo_fact
+       SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE jo_fact;
+
+-- We expect to join to d2 first and then d1, since the condition on d2
+-- is more selective.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+
+-- Force a few different join orders. Some of these are very inefficient,
+-- but the planner considers them all viable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+COMMIT;
+
+-- The unusual formulation of this query is intended to prevent the query
+-- planner from reducing the FULL JOIN to some other join type, so that we
+-- can test what happens with a join type that cannot be reordered.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+
+-- We should not be able to force the planner to join f to d1 first, because
+-- that is not a valid join order, but we should be able to force the planner
+-- to make either d2 or f the driving table.
+BEGIN;
+-- XXX: The advice feedback says 'partially matched' here which isn't exactly
+-- wrong given the way that flag is handled in the code, but it's at the very
+-- least confusing. Something should probably be improved here.
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+COMMIT;
+
+-- XXX: add tests for join order prefix matching
+-- XXX: join_order(justonerel) shouldn't report partially matched
diff --git a/contrib/pg_plan_advice/sql/join_strategy.sql b/contrib/pg_plan_advice/sql/join_strategy.sql

new file mode 100644 (file)

index 0000000..8eb823f
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/join_strategy.sql
@@ -0,0 +1,76 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE join_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE join_dim;
+
+CREATE TABLE join_fact (
+       id int primary key,
+       dim_id integer not null references join_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO join_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
+VACUUM ANALYZE join_fact;
+
+-- We expect a hash join by default.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+
+-- Try forcing each join method in turn with join_dim as the inner table.
+-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
+-- fail, because the planner knows that join_dim (id) is unique, and will
+-- refuse to add mark/restore overhead.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
+
+-- Now try forcing each join method in turn with join_fact as the inner
+-- table. All of these should work.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
+
+-- We can't force a foreign join between these tables, because they
+-- aren't foreign tables.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/local_collector.sql b/contrib/pg_plan_advice/sql/local_collector.sql

new file mode 100644 (file)

index 0000000..be14539
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/local_collector.sql
@@ -0,0 +1,40 @@
+CREATE EXTENSION pg_plan_advice;
+
+-- Try clearing advice before we've collected any.
+SELECT pg_clear_collected_local_advice();
+
+-- Set a small advice collection limit so that we'll exceed it.
+SET pg_plan_advice.local_collection_limit = 2;
+
+-- Set up a dummy table.
+CREATE TABLE dummy_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false, parallel_workers = 0);
+
+-- Test queries.
+SELECT * FROM dummy_table a, dummy_table b;
+SELECT * FROM dummy_table;
+
+-- Should return the advice from the second test query.
+SELECT advice FROM pg_get_collected_local_advice() ORDER BY id LIMIT 1;
+
+-- Now try clearing advice again.
+SELECT pg_clear_collected_local_advice();
+
+-- Raise the collection limit so that the collector uses multiple chunks.
+SET pg_plan_advice.local_collection_limit = 2000;
+
+-- Push a bunch of queries through the collector.
+DO $$
+BEGIN
+       FOR x IN 1..2000 LOOP
+               EXECUTE 'SELECT * FROM dummy_table';
+       END LOOP;
+END
+$$;
+
+-- Check that the collector worked.
+SELECT COUNT(*) FROM pg_get_collected_local_advice();
+
+-- And clear one more time, to verify that this doesn't cause a problem
+-- even with a larger number of entries.
+SELECT pg_clear_collected_local_advice();
diff --git a/contrib/pg_plan_advice/sql/partitionwise.sql b/contrib/pg_plan_advice/sql/partitionwise.sql

new file mode 100644 (file)

index 0000000..e42c061
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/partitionwise.sql
@@ -0,0 +1,78 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET enable_partitionwise_join = true;
+
+CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt1;
+
+CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt2 (id, dim2, val2)
+       SELECT g, 'some other text ' || g, (g % 5) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt2;
+
+CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt3 (id, dim3, val3)
+       SELECT g, 'a third random text ' || g, (g % 7) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt3;
+
+CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int)
+       PARTITION BY RANGE (id);
+CREATE TABLE ptmismatcha PARTITION OF ptmismatch
+    FOR VALUES FROM (1) to (1501)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE ptmismatchb PARTITION OF ptmismatch
+    FOR VALUES FROM (1501) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO ptmismatch (id, dimm, valm)
+       SELECT g, 'yet another text ' || g, (g % 2) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE ptmismatch;
+
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+
+-- Suppress partitionwise join, or do it just partially.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+COMMIT;
+
+-- Can't force a partitionwise join with a mismatched table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/scan.sql b/contrib/pg_plan_advice/sql/scan.sql

new file mode 100644 (file)

index 0000000..25416a7
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/scan.sql
@@ -0,0 +1,195 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET seq_page_cost = 0.1;
+SET random_page_cost = 0.1;
+SET cpu_tuple_cost = 0;
+SET cpu_index_tuple_cost = 0;
+
+CREATE TABLE scan_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO scan_table
+       SELECT g, 'some text ' || g FROM generate_series(1, 100000) g;
+CREATE INDEX scan_table_b ON scan_table USING brin (b);
+VACUUM ANALYZE scan_table;
+
+-- Sequential scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+
+-- Index scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+
+-- Index-only scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+
+-- Bitmap heap scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+
+-- TID scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+
+-- TID range scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+
+-- Try forcing each of our test queries to use the scan type they
+-- wanted to use anyway. This should succeed.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+COMMIT;
+
+-- Try to force a full scan of the table to use some other scan type. All
+-- of these will fail. An index scan or bitmap heap scan could potentially
+-- generate the correct answer, but the planner does not even consider these
+-- possibilities due to the lack of a WHERE clause.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+COMMIT;
+
+-- Try again to force index use. This should now succeed for the INDEX_SCAN
+-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the
+-- query fetches columns not included in the index.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+COMMIT;
+
+-- We can force a primary key lookup to use a sequential scan, but we
+-- can't force it to use an index-only scan (due to the column list)
+-- or a TID scan (due to the absence of a TID qual).
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can forcibly downgrade an index-only scan to an index scan, but we can't
+-- force the use of an index that the planner thinks is inapplicable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can force the use of a sequential scan in place of a bitmap heap scan,
+-- but a plain index scan on a BRIN index is not possible.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can force the use of a sequential scan rather than a TID scan or
+-- TID range scan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+COMMIT;
+
+-- Test more complex scenarios with index scans.
+BEGIN;
+-- Should still work if we mention the schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- But not if we mention the wrong schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- It's OK to repeat the same advice.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- But it doesn't work if the index target is even notionally different.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- Test assorted incorrect advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- Test our ability to refer to multiple instances of the same alias.
+BEGIN;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+COMMIT;
+
+-- Test our ability to refer to scans within a subquery.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+BEGIN;
+-- Should not match.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+-- Should match first query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+-- Should match second query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/syntax.sql b/contrib/pg_plan_advice/sql/syntax.sql

new file mode 100644 (file)

index 0000000..8bc1b71
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/syntax.sql
@@ -0,0 +1,42 @@
+LOAD 'pg_plan_advice';
+
+-- An empty string is allowed, and so is an empty target list.
+SET pg_plan_advice.advice = '';
+SET pg_plan_advice.advice = 'SEQ_SCAN()';
+
+-- Test assorted variations in capitalization, whitespace, and which parts of
+-- the relation identifier are included. These should all work.
+SET pg_plan_advice.advice = 'SEQ_SCAN(x)';
+SET pg_plan_advice.advice = 'seq_scan(x@y)';
+SET pg_plan_advice.advice = 'SEQ_scan(x#2)';
+SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)';
+SET pg_plan_advice.advice = '  SEQ_SCAN ( x / y . z )  ';
+SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")';
+
+-- Syntax errors.
+SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)';
+SET pg_plan_advice.advice = 'SEQ_SCAN';
+SET pg_plan_advice.advice = 'SEQ_SCAN(';
+SET pg_plan_advice.advice = 'SEQ_SCAN("';
+SET pg_plan_advice.advice = 'SEQ_SCAN(#';
+SET pg_plan_advice.advice = '()';
+SET pg_plan_advice.advice = '123';
+
+-- Legal comments.
+SET pg_plan_advice.advice = '/**/';
+SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)';
+
+-- Unterminated comments.
+SET pg_plan_advice.advice = '/*';
+SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops';
+
+-- Nested comments are not supported, so the first of these is legal and
+-- the second is not.
+SET pg_plan_advice.advice = '/*/*/';
+SET pg_plan_advice.advice = '/*/* stuff */*/';
+
+-- Foreign join requires multiple relation identifiers.
+SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)';
+SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))';
diff --git a/contrib/pg_plan_advice/t/001_regress.pl b/contrib/pg_plan_advice/t/001_regress.pl

new file mode 100644 (file)

index 0000000..dffafca
--- /dev/null
+++ b/contrib/pg_plan_advice/t/001_regress.pl
@@ -0,0 +1,139 @@
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+
+# Run the core regression tests under pg_plan_advice to check for problems.
+use strict;
+use warnings FATAL => 'all';
+
+use Cwd            qw(abs_path);
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize the primary node
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init();
+
+# Set up our desired configuration.
+#
+# We run with pg_plan_advice.shared_collection_limit set to ensure that the
+# plan tree walker code runs against every query in the regression tests. If
+# we're unable to properly analyze any of those plan trees, this test should fail.
+#
+# We set pg_plan_advice.advice to an advice string that will cause the advice
+# trove to be populated with a few entries of various sorts, but which we do
+# not expect to match anything in the regression test queries. This way, the
+# planner hooks will be called, improving code coverage, but no plans should
+# actually change.
+#
+# pg_plan_advice.always_explain_supplied_advice=false is needed to avoid breaking
+# regression test queries that use EXPLAIN. In the real world, it seems like
+# users will want EXPLAIN output to show supplied advice so that it's clear
+# whether normal planner behavior has been altered, but here that's undesirable.
+$node->append_conf('postgresql.conf', <<EOM);
+pg_plan_advice.shared_collection_limit=1000000
+shared_preload_libraries=pg_plan_advice
+pg_plan_advice.advice='SEQ_SCAN(entirely_fictitious) HASH_JOIN(total_fabrication) GATHER(completely_imaginary)'
+pg_plan_advice.always_explain_supplied_advice=false
+EOM
+$node->start;
+
+my $srcdir = abs_path("../..");
+
+# --outputdir points to the path where to place the output files.
+my $outputdir = $PostgreSQL::Test::Utils::tmp_check;
+
+# --inputdir points to the path of the input files.
+my $inputdir = "$srcdir/src/test/regress";
+
+# Run the tests.
+my $rc =
+  system($ENV{PG_REGRESS} . " "
+         . "--bindir= "
+         . "--host=" . $node->host . " "
+         . "--port=" . $node->port . " "
+         . "--schedule=$srcdir/src/test/regress/parallel_schedule "
+         . "--max-concurrent-tests=20 "
+         . "--inputdir=\"$inputdir\" "
+         . "--outputdir=\"$outputdir\"");
+
+# Dump out the regression diffs file, if there is one
+if ($rc != 0)
+{
+       my $diffs = "$outputdir/regression.diffs";
+       if (-e $diffs)
+       {
+               print "=== dumping $diffs ===\n";
+               print slurp_file($diffs);
+               print "=== EOF ===\n";
+       }
+}
+
+# Report results
+is($rc, 0, 'regression tests pass');
+
+# Create the extension so we can access the collector
+$node->safe_psql('postgres', 'CREATE EXTENSION pg_plan_advice');
+
+# Verify that a large amount of advice was collected
+my $all_query_count = $node->safe_psql('postgres', <<EOM);
+SELECT COUNT(*) FROM pg_get_collected_shared_advice();
+EOM
+cmp_ok($all_query_count, '>', 40000, "copious advice collected");
+
+# Verify that lots of different advice strings were collected
+my $distinct_query_count = $node->safe_psql('postgres', <<EOM);
+SELECT COUNT(*) FROM
+       (SELECT DISTINCT advice FROM pg_get_collected_shared_advice());
+EOM
+cmp_ok($distinct_query_count, '>', 3000, "diverse advice collected");
+
+# We want to test for the presence of our known tags in the collected advice.
+# Put all tags into the hash that follows; map any tags that aren't tested
+# by the core regression tests to 0, and others to 1.
+my %tag_map = (
+       BITMAP_HEAP_SCAN => 1,
+       FOREIGN_JOIN => 0,
+       GATHER => 1,
+       GATHER_MERGE => 1,
+       HASH_JOIN => 1,
+       INDEX_ONLY_SCAN => 1,
+       INDEX_SCAN => 1,
+       JOIN_ORDER => 1,
+       MERGE_JOIN_MATERIALIZE => 1,
+       MERGE_JOIN_PLAIN => 1,
+       NESTED_LOOP_MATERIALIZE => 1,
+       NESTED_LOOP_MEMOIZE => 1,
+       NESTED_LOOP_PLAIN => 1,
+       NO_GATHER => 1,
+       PARTITIONWISE => 1,
+       SEMIJOIN_NON_UNIQUE => 1,
+       SEMIJOIN_UNIQUE => 1,
+       SEQ_SCAN => 1,
+       TID_SCAN => 1,
+);
+while (my ($tag, $checkit) = each %tag_map)
+{
+       # Search for the given tag. This is not entirely robust: it could get thrown
+       # off by a table alias such as "FOREIGN_JOIN(", but that probably won't
+       # happen in the core regression tests.
+       my $tag_count = $node->safe_psql('postgres', <<EOM);
+SELECT COUNT(*) FROM pg_get_collected_shared_advice()
+       WHERE advice LIKE '%$tag(%'
+EOM
+
+       # Check that the tag got a non-trivial amount of use, unless told otherwise.
+       cmp_ok($tag_count, '>', 10, "multiple uses of $tag") if $checkit;
+
+       # Regardless, note the exact count in the log, for human consumption.
+       note("found $tag_count advice strings containing $tag");
+}
+
+# Trigger a partial cleanup of the shared advice collector, and then a full
+# cleanup.
+$node->safe_psql('postgres', <<EOM);
+SET pg_plan_advice.shared_collection_limit=500;
+SELECT * FROM pg_clear_collected_shared_advice();
+EOM
+
+done_testing();
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index 4ff47115ca8d0a5db78462f8be799c051449ffe3..d1a7e5f8c463442fb29dda79af48988f41edfc32 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3928,6 +3928,43 @@ pg_wc_probefunc
  pg_wchar
  pg_wchar_tbl
  pgp_armor_headers_state
+pgpa_collected_advice
+pgpa_advice_item
+pgpa_advice_tag_type
+pgpa_advice_target
+pgpa_identifier
+pgpa_index_target
+pgpa_index_type
+pgpa_itm_type
+pgpa_join_class
+pgpa_join_member
+pgpa_join_state
+pgpa_join_strategy
+pgpa_join_unroller
+pgpa_local_advice
+pgpa_local_advice_chunk
+pgpa_output_context
+pgpa_plan_walker_context
+pgpa_planner_state
+pgpa_qf_type
+pgpa_query_feature
+pgpa_ri_checker
+pgpa_ri_checker_key
+pgpa_scan
+pgpa_scan_strategy
+pgpa_shared_advice
+pgpa_shared_advice_chunk
+pgpa_shared_state
+pgpa_target_type
+pgpa_trove
+pgpa_trove_entry
+pgpa_trove_entry_element
+pgpa_trove_entry_hash
+pgpa_trove_entry_key
+pgpa_trove_lookup_type
+pgpa_trove_result
+pgpa_trove_slice
+pgpa_unrolled_join
  pgsocket
  pgsql_thing_t
  pgssEntry
author	Robert Haas <rhaas@postgresql.org>
	Tue, 4 Nov 2025 19:45:31 +0000 (14:45 -0500)
committer	Robert Haas <rhaas@postgresql.org>
	Thu, 6 Nov 2025 16:41:59 +0000 (11:41 -0500)
contrib/Makefile		patch \| blob \| blame \| history
contrib/meson.build		patch \| blob \| blame \| history
contrib/pg_plan_advice/.gitignore	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/Makefile	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/README	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/expected/gather.out	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/expected/join_order.out	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/expected/join_strategy.out	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/expected/local_collector.out	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/expected/partitionwise.out	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/expected/scan.out	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/expected/syntax.out	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/meson.build	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pg_plan_advice--1.0.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pg_plan_advice.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pg_plan_advice.control	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pg_plan_advice.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_ast.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_ast.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_collector.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_collector.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_identifier.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_identifier.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_join.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_join.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_output.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_output.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_parser.y	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_planner.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_planner.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_scan.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_scan.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_scanner.l	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_trove.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_trove.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_walker.c	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/pgpa_walker.h	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/sql/gather.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/sql/join_order.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/sql/join_strategy.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/sql/local_collector.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/sql/partitionwise.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/sql/scan.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/sql/syntax.sql	[new file with mode: 0644]	patch \| blob
contrib/pg_plan_advice/t/001_regress.pl	[new file with mode: 0644]	patch \| blob
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history