WIP: Add pg_plan_advice contrib module. advice_unstable
authorRobert Haas <rhaas@postgresql.org>
Tue, 4 Nov 2025 19:45:31 +0000 (14:45 -0500)
committerRobert Haas <rhaas@postgresql.org>
Thu, 6 Nov 2025 16:41:59 +0000 (11:41 -0500)
Provide a facility that (1) can be used to stabilize certain plan choices
so that the planner cannot reverse course without authorization and
(2) can be used by knowledgeable users to insist on plan choices contrary
to what the planner believes best. In both cases, terrible outcomes are
possible: users should think twice and perhaps three times before
constraining the planner's ability to do as it thinks best; nevertheless,
there are problems that are much more easily solved with these facilities
than without them.

We take the approach of analyzing a finished plan to produce textual
output, which we call "plan advice", that describes key decisions made
during plan; if that plan advice is provided during future planning
cycles, it will force those key decisions to be made in the same way.
Not all planner decisions can be controlled using advice; for example,
decisions about how to perform aggregation are currently out of scope,
as is choice of sort order. Plan advice can also be edited by the user,
or even written from scratch in simple cases, making it possible to
generate outcomes that the planner would not have produced. Partial
advice can be provided to control some planner outcomes but not others.

Currently, plan advice is focused only on specific outcomes, such as
the choice to use a sequential scan for a particular relation, and not
on estimates that might contribute to those outcomes, such as a
possibly-incorrect selectivity estimate. While it would be useful to
users to be able to provide plan advice that affects selectivity
estimates or other aspects of costing, that is out of scope for this
commit.

For more details, see contrib/pg_plan_advice/README.

NOTE: This code is just a proof of concept. A bunch of things don't
work and a lot of the code needs cleanup. It has no SGML documentation
and not enough test cases, and some of the existing test cases don't
do as we would hope. Known problems are called out by XXX.

46 files changed:
contrib/Makefile
contrib/meson.build
contrib/pg_plan_advice/.gitignore [new file with mode: 0644]
contrib/pg_plan_advice/Makefile [new file with mode: 0644]
contrib/pg_plan_advice/README [new file with mode: 0644]
contrib/pg_plan_advice/expected/gather.out [new file with mode: 0644]
contrib/pg_plan_advice/expected/join_order.out [new file with mode: 0644]
contrib/pg_plan_advice/expected/join_strategy.out [new file with mode: 0644]
contrib/pg_plan_advice/expected/local_collector.out [new file with mode: 0644]
contrib/pg_plan_advice/expected/partitionwise.out [new file with mode: 0644]
contrib/pg_plan_advice/expected/scan.out [new file with mode: 0644]
contrib/pg_plan_advice/expected/syntax.out [new file with mode: 0644]
contrib/pg_plan_advice/meson.build [new file with mode: 0644]
contrib/pg_plan_advice/pg_plan_advice--1.0.sql [new file with mode: 0644]
contrib/pg_plan_advice/pg_plan_advice.c [new file with mode: 0644]
contrib/pg_plan_advice/pg_plan_advice.control [new file with mode: 0644]
contrib/pg_plan_advice/pg_plan_advice.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_ast.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_ast.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_collector.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_collector.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_identifier.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_identifier.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_join.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_join.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_output.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_output.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_parser.y [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_planner.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_planner.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_scan.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_scan.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_scanner.l [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_trove.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_trove.h [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_walker.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_walker.h [new file with mode: 0644]
contrib/pg_plan_advice/sql/gather.sql [new file with mode: 0644]
contrib/pg_plan_advice/sql/join_order.sql [new file with mode: 0644]
contrib/pg_plan_advice/sql/join_strategy.sql [new file with mode: 0644]
contrib/pg_plan_advice/sql/local_collector.sql [new file with mode: 0644]
contrib/pg_plan_advice/sql/partitionwise.sql [new file with mode: 0644]
contrib/pg_plan_advice/sql/scan.sql [new file with mode: 0644]
contrib/pg_plan_advice/sql/syntax.sql [new file with mode: 0644]
contrib/pg_plan_advice/t/001_regress.pl [new file with mode: 0644]
src/tools/pgindent/typedefs.list

index 2f0a88d3f77448d3f6b9f1dda3de3690be747552..dd04c20acd25b6a577e297a99db4f2e36e0c91fc 100644 (file)
@@ -34,6 +34,7 @@ SUBDIRS = \
                pg_freespacemap \
                pg_logicalinspect \
                pg_overexplain \
+               pg_plan_advice \
                pg_prewarm      \
                pg_stat_statements \
                pg_surgery      \
index ed30ee7d639f6690d2d848a41adbc6e7c18c9cee..cb718dbdac0bc7f69fcf6cce1300417378643af1 100644 (file)
@@ -48,6 +48,7 @@ subdir('pgcrypto')
 subdir('pg_freespacemap')
 subdir('pg_logicalinspect')
 subdir('pg_overexplain')
+subdir('pg_plan_advice')
 subdir('pg_prewarm')
 subdir('pgrowlocks')
 subdir('pg_stat_statements')
diff --git a/contrib/pg_plan_advice/.gitignore b/contrib/pg_plan_advice/.gitignore
new file mode 100644 (file)
index 0000000..19a1425
--- /dev/null
@@ -0,0 +1,3 @@
+/pgpa_parser.h
+/pgpa_parser.c
+/pgpa_scanner.c
diff --git a/contrib/pg_plan_advice/Makefile b/contrib/pg_plan_advice/Makefile
new file mode 100644 (file)
index 0000000..d7e06fc
--- /dev/null
@@ -0,0 +1,46 @@
+# contrib/pg_plan_advice/Makefile
+
+MODULE_big = pg_plan_advice
+OBJS = \
+       $(WIN32RES) \
+       pg_plan_advice.o \
+       pgpa_ast.o \
+       pgpa_collector.o \
+       pgpa_identifier.o \
+       pgpa_join.o \
+       pgpa_output.o \
+       pgpa_parser.o \
+       pgpa_planner.o \
+       pgpa_scan.o \
+       pgpa_scanner.o \
+       pgpa_trove.o \
+       pgpa_walker.o
+
+EXTENSION = pg_plan_advice
+DATA = pg_plan_advice--1.0.sql
+PGFILEDESC = "pg_plan_advice - help the planner get the right plan"
+
+REGRESS = gather join_order join_strategy partitionwise scan
+TAP_TESTS = 1
+
+EXTRA_CLEAN = pgpa_parser.h pgpa_parser.c pgpa_scanner.c
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_plan_advice
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+# See notes in src/backend/parser/Makefile about the following two rules
+pgpa_parser.h: pgpa_parser.c
+       touch $@
+
+pgpa_parser.c: BISONFLAGS += -d
+
+# Force these dependencies to be known even without dependency info built:
+pgpa_parser.o pgpa_scanner.o: pgpa_parser.h
diff --git a/contrib/pg_plan_advice/README b/contrib/pg_plan_advice/README
new file mode 100644 (file)
index 0000000..4590cd0
--- /dev/null
@@ -0,0 +1,275 @@
+contrib/pg_plan_advice/README
+
+Plan Advice
+===========
+
+This module implements a mini-language for "plan advice" that allows for
+control of certain key planner decisions. Goals include (1) enforcing plan
+stability (my previous plan was good and I would like to keep getting a
+similar one) and (2) allowing users to experiment with plans other than
+the one preferred by the optimizer. Non-goals include (1) controlling
+every possible planner decision and (2) forcing consideration of plans
+that the optimizer rejects for reasons other than cost. (There is some
+room for bikeshedding about what exactly this non-goal means: what if
+we skip path generation entirely for a certain case on the theory that
+we know it cannot win on cost? Does that count as a cost-based rejection
+even though no cost was ever computed?)
+
+Generally, plan advice is a series of whitespace-separated advice items,
+each of which applies an advice tag to a list of advice targets. For
+example, "SEQ_SCAN(foo) HASH_JOIN(bar@ss)" contains two items of advice,
+the first of which applies the SEQ_SCAN tag to "foo" and the second of
+which applies the HASH_JOIN tag to "bar@ss". In this simple example, each
+target identifies a single relation; see "Relation Identifiers", below.
+Advice tags can also be applied to groups of relations; for example,
+"HASH_JOIN(baz (bletch quux))" applies the HASH_JOIN tag to the single
+relation identifier "baz" as well as to the 2-item list containing
+"bletch" and "quux".
+
+Critically, this module knows both how to generate plan advice from an
+already-existing plan, and also how to enforce it during future planning
+cycles. Everything it does is intended to be "round-trip safe": if you
+generate advice from a plan and then feed that back into a future planing
+cycle, each piece of advice should be guaranteed to apply to the exactly the
+same part of the query from which it was generated without ambiguity or
+guesswork, and it should succesfully enforce the same planning decision that
+led to it being generated in the first place. Note that there is no
+intention that these guarantees hold in the presence of intervening DDL;
+e.g. if you change the properties of a function so that a subquery is no
+longer inlined, or if you drop an index named in the plan advice, the advice
+isn't going to work any more. That's expected.
+
+This module aims to force the planner to follow any provided advice without
+regard to whether it is appears to be good advice or bad advice.  If the
+user provides bad advice, whether derived from a previously-generated plan
+or manually written, they may get a bad plan. We regard this as user error,
+not a defect in this module. It seems likely that applying advice
+judiciously and only when truly required to avoid problems will be a more
+successful strategy than applying it with a broad brush, but users are free
+to experiment with whatever strategies they think best.
+
+Relation Identifiers
+====================
+
+Uniquely identifying the part of a query to which a certain piece of
+advice applies is harder than it sounds. Our basic approach is to use
+relation aliases as a starting point, and then disambiguate. There are
+three ways that same relation alias can occur multiple times:
+
+1. It can appear in more than one subquery.
+
+2. It can appear more than once in the same subquery,
+   e.g. (foo JOIN bar) x JOIN foo.
+
+3. The table can be partitioned.
+
+Any combination of these things can occur simultaneously.  Therefore, our
+general syntax for a relation identifier is:
+
+alias_name#occurrence_number/partition_schema.partition_name@plan_name
+
+All components except for the alias_name are optional and included only
+when required. When a component is omitted, the associated punctuation
+must also be omitted. Occurrence numbers are counted ignoring children of
+partitioned tables.  When the generated occurrence number is 1, we omit
+the occurrence number. The partition schema and partition name are included
+only for children of partitioned tables. In generated advice, the
+partition_schema is always included whenever there is a partition_name,
+but user-written advice may mention the name and omit the schema. The
+plan_name is omitted for the top-level PlannerInfo.
+
+Scan Advice
+===========
+
+For many types of scan, no advice is generated or possible; for instance,
+a subquery is always scanned using a subquery scan. While that scan may be
+elided via setrefs processing, this doesn't change the fact that only one
+basic approach exists. Hence, scan advice applies mostly to relations, which
+can be scanned in multiple ways.
+
+We tend to think of a scan as targeting a single relation, and that's
+normally the case, but it doesn't have to be. For instance, if a join is
+proven empty, the whole thing may be replaced with a single Result node
+which, in effect, is a degenerate scan of every relation in the collapsed
+portion of the join tree. Similarly, it's possible to inject a custom scan
+in such a way that it replaces an entire join. If we ever emit advice
+for these cases, it would target sets of relation identifiers surrounded
+by curly brances, e.g. SOME_SORT_OF_SCAN(foo (bar baz)) would mean that the
+the given scan type would be used for foo as a single relation and also the
+combination of bar and baz as a join product. We have no such cases at
+present.
+
+For index and index-only scans, both the relation being scanned and the
+index or indexes being used must be specified. For example, INDEX_SCAN(foo
+foo_a_idx bar bar_b_idx) indicates that an index scan (not an index-only
+scan) should be used on foo_a_idx when scanning foo, and that an index scan
+should be used on bar_b_idx when scanning bar.
+
+Bitmap heap scans allow for a more complicated index specification. For
+example, BITMAP_HEAP_SCAN(foo &&(foo_a_idx ||(foo_b_idx foo_c_idx))) says
+that foo should be scanned using a BitmapHeapScan over a BitmapAnd between
+foo_a_idx and the result of a BitmapOr between foo_b_idx and foo_c_idx.
+
+XXX: Currently, BITMAP_HEAP_SCAN does not enforce the index specification,
+because the available hooks are insufficient to do so. It's possible that
+this should be changed to exclude the index specification altogether and
+simply insist that some sort of bitmap heap scan is used; alternatively,
+we need better hooks.
+
+Join Order Advice
+=================
+
+The JOIN_ORDER tag specifies the order in which several tables that are
+part of the same join problem should be joined. Each subquery (except for
+those that are inlined) is a separate join problem. Within a subquery,
+partitionwise joins can create additional, separate join problems. Hence,
+queries involving partitionwise joins may use JOIN_ORDER() many times.
+
+We take the canonical join structure to be an outer-deep tree, so
+JOIN_ORDER(t1 t2 t3) says that t1 is the driving table and should be joined
+first to t2 and then to t3. If the join problem involves additional tables,
+they can be joined in any order after the join between t1, t2, and t3 has
+been constructured. Generated join advice always mentions all tables
+in the join problem, but manually written join advice need not do so.
+
+For trees which are not outer-deep, parentheses can be used. For example,
+JOIN_ORDER(t1 (t2 t3)) says that the top-level join should have t1 on the
+outer side and a join between t2 and t3 on the inner side. That join should
+be constructed so that t2 is on the outer side and t3 is on the inner side.
+
+In some cases, it's not possible to fully specify the join order in this way.
+For example, if t2 and t3 are being scanned by a single custom scan or foreign
+scan, or if a partitionwise join is being performed between those tables, then
+it's impossible to say that t2 is the outer table and t3 is the inner table,
+or the other way around; it's just undefined. In such cases, we generate
+join advice that uses curly braces, intending to indicate a lack of ordering:
+JOIN_ORDER(t1 {t2 t3}) says that the uppermost join should have t1 on the outer
+side and some kind of join between t2 and t3 on the inner side, but without
+saying how that join must be performed or anything about which relation should
+appear on which side of the join, or even whether this kind of join has sides.
+
+Join Strategy Advice
+====================
+
+Tags such as NESTED_LOOP_PLAIN specify the method that should be used to
+perform a certain join. More specifically, NESTED_LOOP_PLAIN(x (y z)) says
+that the plan should put the relation whose identifier is "x" on the inner
+side of a plain nested loop (one without materialization or memoization)
+and that it should also put a join between the relation whose identifier is
+"y" and the relation whose identifier is "z" on the inner side of a nested
+loop. Hence, for an N-table join problem, there will be N-1 pieces of join
+strategy advice; no join strategy advice is required for the outermost
+table in the join problem.
+
+Considering that we have both join order advice and join strategy advice,
+it might seem natural to say that NESTED_LOOP_PLAIN(x) should be redefined
+to mean that x should appear by itself on one side or the other of a nested
+loop, rather than specifically on the inner side, but this definition appears
+useless in practice. It gives the planner too much freedom to do things that
+bear little resemblance to what the user probably had in mind. This makes
+only a limited amount of practical difference in the case of a merge join or
+unparameterized nested loop, but for a parameterized nested loop or a hash
+join, the two sides are treated very differently and saying that a certain
+relation should be involved in one of those operations without saying which
+role it should take isn't saying much.
+
+This choice of definition implies that join strategy advice also imposes some
+join order constraints. For example, given a join between foo and bar,
+HASH_JOIN(bar) implies that foo is the driving table. Otherwise, it would
+be impossible to put bar beneath the inner side of a Hash Join.
+
+Note that, given this definition, it's reasonable to consider deleting the
+join order advice but applying the join strategy advice. For example,
+consider a star schema with tables fact, dim1, dim2, dim3, dim4, and dim5.
+The automatically generated advice might specify JOIN_ORDER(fact dim1 dim3
+dim4 dim2 dim5) HASH_JOIN(dim2 dim4) NESTED_LOOP_PLAIN(dim1 dim3 dim5).
+Deleting the JOIN_ORDER advice allows the planner to reorder the joins
+however it likes while still forcing the same choice of join method. This
+seems potentially useful, and is one reason why a unified syntax that controls
+both join order and join method in a single locution was not chosen.
+
+Advice Completeness
+===================
+
+An essential guiding principle is that no inference may made on the basis
+of the absence of advice. The user is entitled to remove any portion of the
+generated advice which they deem unsuitable or counterproductive and the
+result should only be to increase the flexibility afforded to the planner.
+This means that if advice can say that a certain optimization or technique
+should be used, it should also be able to say that the optimization or
+technique should not be used. We should never assume that the absence of an
+instruction to do a certain thing means that it should not be done; all
+instructions must be explicit.
+
+Semijoin Uniqueness
+===================
+
+Faced with a semijoin, the planner considers both a direct implementation
+and a plan where the one side is made unique and then an inner join is
+performed. We emit SEMIJOIN_UNIQUE() advice when this transformation occurs
+and SEMIJOIN_NON_UNIQUE() advice when it doesn't. These items work like
+join strategy advice: the inner side of the relevant join is named, and the
+chosen join order must be compatible with the advice having some effect.
+
+XXX: Currently, SEMIJOIN_NON_UNIQUE() advice is emitted in some situations
+where the SEMIJOIN_UNIQUE() approach was determined to be non-viable; ideally,
+we should avoid that.
+
+XXX: Right semijoins haven't been properly thought through. The associated
+code probably just doesn't work.
+
+XXX: Semijoin uniqueness advice has no automated tests and need substantially
+more manual testing.
+
+Partitionwise
+=============
+
+PARTITIONWISE() advise can be used to specify both those partitionwise joins
+which should be performed and those which should not be performed; the idea
+is that each argument to PARTITIONWISE specifies a set of relations that
+should be scanned partitionwise after being joined to each other and nothing
+else. Hence, for example, PARTITIONWISE((t1 t2) t3) specifies that the
+query should contain a partitionwise join between t1 and t2 and that t3
+should not be part of any partitionwise join. If there are no other rels
+in the query, specifying just PARTITIONWISE((t1 t2)) would have the same
+effect, since there would be no other rels to which t3 could be joined in
+a partitionwise fashion.
+
+Parallel Query (Gather, etc.)
+=============================
+
+Each argument to GATHER() or GATHER_MERGE() is a single relation or an
+exact set of relations on top of which a Gather or Gather Merge node,
+respectively, should be placed. Each argument to NO_GATHER() is a single
+relation that should not appear beneath any Gather or Gather Merge node;
+that is, parallelism should not be used.
+
+Implicit Join Order Constraints
+===============================
+
+When JOIN_ORDER() advice is not provided for a particular join problem,
+other pieces of advice may still incidentally constraint the join order.
+For example, a user who specifies HASH_JOIN((foo bar)) is explicitly saying
+that there should be a hash join with exactly foo and bar on the outer
+side of it, but that also implies that foo and bar must be joined to
+each other before either of them is joined to anything else. Otherwise,
+the join the user is attempting to constraint won't actually occur in the
+query, which ends up looking like the system has just decided to ignore
+the advice altogether.
+
+Future Work
+===========
+
+We don't handle choice of aggregation: it would be nice to be able to force
+sorted or grouped aggregation. I'm guessing this can be left to future work.
+
+More seriously, we don't know anything about eager aggregation, which could
+have a large impact on the shape of the plan tree. XXX: This needs some study
+to determine how large a problem it is, and might need to be fixed sooner
+rather than later.
+
+We don't offer any control over estimates, only outcomes. It seems like a
+good idea to incorporate that ability at some future point, as pg_hint_plan
+does. However, since primary goal of the initial development work is to be
+able to induce the planner to recreate a desired plan that worked well in
+the past, this has not been included in the initial development effort.
diff --git a/contrib/pg_plan_advice/expected/gather.out b/contrib/pg_plan_advice/expected/gather.out
new file mode 100644 (file)
index 0000000..d0224a2
--- /dev/null
@@ -0,0 +1,320 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 1;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+SET min_parallel_table_scan_size = 0;
+SET debug_parallel_query = off;
+CREATE TABLE gt_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE gt_dim;
+CREATE TABLE gt_fact (
+       id int not null,
+       dim_id integer not null references gt_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO gt_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE gt_fact;
+-- By default, we expect Gather Merge with a parallel hash join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Gather Merge
+   Workers Planned: 1
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Parallel Hash Join
+               Hash Cond: (f.dim_id = d.id)
+               ->  Parallel Seq Scan on gt_fact f
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on gt_dim d
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE((f d))
+(14 rows)
+
+-- Force Gather or Gather Merge of both relations together.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Gather Merge
+   Workers Planned: 1
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Parallel Hash Join
+               Hash Cond: (f.dim_id = d.id)
+               ->  Parallel Seq Scan on gt_fact f
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE((f d)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE((f d))
+(16 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Sort
+   Sort Key: f.dim_id
+   ->  Gather
+         Workers Planned: 1
+         ->  Parallel Hash Join
+               Hash Cond: (f.dim_id = d.id)
+               ->  Parallel Seq Scan on gt_fact f
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER((f d)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER((f d))
+(16 rows)
+
+COMMIT;
+-- Force a separate Gather or Gather Merge operation for each relation.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: f.dim_id
+               ->  Parallel Seq Scan on gt_fact f
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: d.id
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE(f) /* matched */
+   GATHER_MERGE(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE(f d)
+(20 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_fact f
+   ->  Sort
+         Sort Key: d.id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER(f) /* matched */
+   GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER(f d)
+(20 rows)
+
+COMMIT;
+-- Force a Gather or Gather Merge on one relation but no parallelism on other.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: f.dim_id
+               ->  Parallel Seq Scan on gt_fact f
+   ->  Index Scan using gt_dim_pkey on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE(f) /* matched */
+   NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.gt_dim_pkey)
+   GATHER_MERGE(f)
+   NO_GATHER(d)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Seq Scan on gt_fact f
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: d.id
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE(d) /* matched */
+   NO_GATHER(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER_MERGE(d)
+   NO_GATHER(f)
+(19 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using gt_dim_pkey on gt_dim d
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_fact f
+ Supplied Plan Advice:
+   GATHER(f) /* matched */
+   NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_PLAIN(f)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.gt_dim_pkey)
+   GATHER(f)
+   NO_GATHER(d)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Seq Scan on gt_fact f
+   ->  Sort
+         Sort Key: d.id
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER(d) /* matched */
+   NO_GATHER(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   SEQ_SCAN(f d)
+   GATHER(d)
+   NO_GATHER(f)
+(19 rows)
+
+COMMIT;
+-- Force no Gather or Gather Merge use at all.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'no_gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+                   QUERY PLAN                   
+------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using gt_dim_pkey on gt_dim d
+   ->  Sort
+         Sort Key: f.dim_id
+         ->  Seq Scan on gt_fact f
+ Supplied Plan Advice:
+   NO_GATHER(f) /* matched */
+   NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_PLAIN(f)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.gt_dim_pkey)
+   NO_GATHER(f d)
+(15 rows)
+
+COMMIT;
+-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Gather
+   Disabled: true
+   Workers Planned: 1
+   ->  Parallel Hash Join
+         Hash Cond: (f.dim_id = d.id)
+         ->  Parallel Seq Scan on gt_fact f
+         ->  Parallel Hash
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER_MERGE((f d)) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER((f d))
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Gather
+   Workers Planned: 1
+   ->  Parallel Hash Join
+         Hash Cond: (f.dim_id = d.id)
+         ->  Parallel Seq Scan on gt_fact f
+         ->  Parallel Hash
+               ->  Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+   GATHER((f d)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   GATHER((f d))
+(14 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/join_order.out b/contrib/pg_plan_advice/expected/join_order.out
new file mode 100644 (file)
index 0000000..e876523
--- /dev/null
@@ -0,0 +1,292 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,100) g;
+VACUUM ANALYZE jo_dim1;
+CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim2 (id, dim2, val2)
+       SELECT g, 'some filler text ' || g, (g % 7) + 1
+         FROM generate_series(1,1000) g;
+VACUUM ANALYZE jo_dim2;
+CREATE TABLE jo_fact (
+       id int primary key,
+       dim1_id integer not null references jo_dim1 (id),
+       dim2_id integer not null references jo_dim2 (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO jo_fact
+       SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE jo_fact;
+-- We expect to join to d2 first and then d1, since the condition on d2
+-- is more selective.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                QUERY PLAN                
+------------------------------------------
+ Hash Join
+   Hash Cond: (f.dim1_id = d1.id)
+   ->  Hash Join
+         Hash Cond: (f.dim2_id = d2.id)
+         ->  Seq Scan on jo_fact f
+         ->  Hash
+               ->  Seq Scan on jo_dim2 d2
+                     Filter: (val2 = 1)
+   ->  Hash
+         ->  Seq Scan on jo_dim1 d1
+               Filter: (val1 = 1)
+ Generated Plan Advice:
+   JOIN_ORDER(f d2 d1)
+   HASH_JOIN(d2 d1)
+   SEQ_SCAN(f d2 d1)
+   NO_GATHER(f d1 d2)
+(16 rows)
+
+-- Force a few different join orders. Some of these are very inefficient,
+-- but the planner considers them all viable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                QUERY PLAN                
+------------------------------------------
+ Hash Join
+   Hash Cond: (f.dim2_id = d2.id)
+   ->  Hash Join
+         Hash Cond: (f.dim1_id = d1.id)
+         ->  Seq Scan on jo_fact f
+         ->  Hash
+               ->  Seq Scan on jo_dim1 d1
+                     Filter: (val1 = 1)
+   ->  Hash
+         ->  Seq Scan on jo_dim2 d2
+               Filter: (val2 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(f d1 d2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d1 d2)
+   HASH_JOIN(d1 d2)
+   SEQ_SCAN(f d1 d2)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                QUERY PLAN                
+------------------------------------------
+ Hash Join
+   Hash Cond: (f.dim1_id = d1.id)
+   ->  Hash Join
+         Hash Cond: (f.dim2_id = d2.id)
+         ->  Seq Scan on jo_fact f
+         ->  Hash
+               ->  Seq Scan on jo_dim2 d2
+                     Filter: (val2 = 1)
+   ->  Hash
+         ->  Seq Scan on jo_dim1 d1
+               Filter: (val1 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(f d2 d1) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d2 d1)
+   HASH_JOIN(d2 d1)
+   SEQ_SCAN(f d2 d1)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+               QUERY PLAN                
+-----------------------------------------
+ Hash Join
+   Hash Cond: (f.dim2_id = d2.id)
+   ->  Hash Join
+         Hash Cond: (d1.id = f.dim1_id)
+         ->  Seq Scan on jo_dim1 d1
+               Filter: (val1 = 1)
+         ->  Hash
+               ->  Seq Scan on jo_fact f
+   ->  Hash
+         ->  Seq Scan on jo_dim2 d2
+               Filter: (val2 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(d1 f d2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d1 f d2)
+   HASH_JOIN(f d2)
+   SEQ_SCAN(d1 f d2)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((f.dim1_id = d1.id) AND (f.dim2_id = d2.id))
+   ->  Seq Scan on jo_fact f
+   ->  Hash
+         ->  Nested Loop
+               ->  Seq Scan on jo_dim1 d1
+                     Filter: (val1 = 1)
+               ->  Materialize
+                     ->  Seq Scan on jo_dim2 d2
+                           Filter: (val2 = 1)
+ Supplied Plan Advice:
+   JOIN_ORDER(f (d1 d2)) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f (d1 d2))
+   NESTED_LOOP_MATERIALIZE(d2)
+   HASH_JOIN(d2)
+   SEQ_SCAN(f d1 d2)
+   NO_GATHER(f d1 d2)
+(18 rows)
+
+COMMIT;
+-- The unusual formulation of this query is intended to prevent the query
+-- planner from reducing the FULL JOIN to some other join type, so that we
+-- can test what happens with a join type that cannot be reordered.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+   ->  Materialize
+         ->  Seq Scan on jo_dim1 d1
+ Generated Plan Advice:
+   JOIN_ORDER(d2 f d1)
+   MERGE_JOIN_PLAIN(f)
+   NESTED_LOOP_MATERIALIZE(d1)
+   SEQ_SCAN(d2 f d1)
+   NO_GATHER(d1 f d2)
+(18 rows)
+
+-- We should not be able to force the planner to join f to d1 first, because
+-- that is not a valid join order, but we should be able to force the planner
+-- to make either d2 or f the driving table.
+BEGIN;
+-- XXX: The advice feedback says 'partially matched' here which isn't exactly
+-- wrong given the way that flag is handled in the code, but it's at the very
+-- least confusing. Something should probably be improved here.
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Disabled: true
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Disabled: true
+         Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+   ->  Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+   JOIN_ORDER(f d1 d2) /* partially matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d2 f d1)
+   MERGE_JOIN_PLAIN(f)
+   NESTED_LOOP_PLAIN(d1)
+   SEQ_SCAN(d2 f d1)
+   NO_GATHER(d1 f d2)
+(21 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0)))
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+   ->  Materialize
+         ->  Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+   JOIN_ORDER(f d2 d1) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d2 d1)
+   MERGE_JOIN_PLAIN(d2)
+   NESTED_LOOP_MATERIALIZE(d1)
+   SEQ_SCAN(f d2 d1)
+   NO_GATHER(d1 f d2)
+(20 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+   ->  Merge Full Join
+         Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+         ->  Sort
+               Sort Key: ((d2.id + 0))
+               ->  Seq Scan on jo_dim2 d2
+         ->  Sort
+               Sort Key: ((f.dim2_id + 0))
+               ->  Seq Scan on jo_fact f
+   ->  Materialize
+         ->  Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+   JOIN_ORDER(d2 f d1) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d2 f d1)
+   MERGE_JOIN_PLAIN(f)
+   NESTED_LOOP_MATERIALIZE(d1)
+   SEQ_SCAN(d2 f d1)
+   NO_GATHER(d1 f d2)
+(20 rows)
+
+COMMIT;
+-- XXX: add tests for join order prefix matching
+-- XXX: join_order(justonerel) shouldn't report partially matched
diff --git a/contrib/pg_plan_advice/expected/join_strategy.out b/contrib/pg_plan_advice/expected/join_strategy.out
new file mode 100644 (file)
index 0000000..71ee26a
--- /dev/null
@@ -0,0 +1,297 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE join_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE join_dim;
+CREATE TABLE join_fact (
+       id int primary key,
+       dim_id integer not null references join_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO join_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
+VACUUM ANALYZE join_fact;
+-- We expect a hash join by default.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+             QUERY PLAN             
+------------------------------------
+ Hash Join
+   Hash Cond: (f.dim_id = d.id)
+   ->  Seq Scan on join_fact f
+   ->  Hash
+         ->  Seq Scan on join_dim d
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   NO_GATHER(f d)
+(10 rows)
+
+-- Try forcing each join method in turn with join_dim as the inner table.
+-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
+-- fail, because the planner knows that join_dim (id) is unique, and will
+-- refuse to add mark/restore overhead.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+             QUERY PLAN             
+------------------------------------
+ Hash Join
+   Hash Cond: (f.dim_id = d.id)
+   ->  Seq Scan on join_fact f
+   ->  Hash
+         ->  Seq Scan on join_dim d
+ Supplied Plan Advice:
+   HASH_JOIN(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   HASH_JOIN(d)
+   SEQ_SCAN(f d)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Disabled: true
+   Merge Cond: (f.dim_id = d.id)
+   ->  Index Scan using join_fact_dim_id on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+ Supplied Plan Advice:
+   MERGE_JOIN_MATERIALIZE(d) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Merge Cond: (f.dim_id = d.id)
+   ->  Index Scan using join_fact_dim_id on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+ Supplied Plan Advice:
+   MERGE_JOIN_PLAIN(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   MERGE_JOIN_PLAIN(d)
+   INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey)
+   NO_GATHER(f d)
+(11 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                 QUERY PLAN                 
+--------------------------------------------
+ Nested Loop
+   Join Filter: (f.dim_id = d.id)
+   ->  Seq Scan on join_fact f
+   ->  Materialize
+         ->  Seq Scan on join_dim d
+ Supplied Plan Advice:
+   NESTED_LOOP_MATERIALIZE(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_MATERIALIZE(d)
+   SEQ_SCAN(f d)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_fact f
+   ->  Memoize
+         Cache Key: f.dim_id
+         Cache Mode: logical
+         ->  Index Scan using join_dim_pkey on join_dim d
+               Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+   NESTED_LOOP_MEMOIZE(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_MEMOIZE(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.join_dim_pkey)
+   NO_GATHER(f d)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+         Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+   NESTED_LOOP_PLAIN(d) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_PLAIN(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.join_dim_pkey)
+   NO_GATHER(f d)
+(12 rows)
+
+COMMIT;
+-- Now try forcing each join method in turn with join_fact as the inner
+-- table. All of these should work.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+             QUERY PLAN              
+-------------------------------------
+ Hash Join
+   Hash Cond: (d.id = f.dim_id)
+   ->  Seq Scan on join_dim d
+   ->  Hash
+         ->  Seq Scan on join_fact f
+ Supplied Plan Advice:
+   HASH_JOIN(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   HASH_JOIN(f)
+   SEQ_SCAN(d f)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using join_dim_pkey on join_dim d
+   ->  Materialize
+         ->  Index Scan using join_fact_dim_id on join_fact f
+ Supplied Plan Advice:
+   MERGE_JOIN_MATERIALIZE(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_MATERIALIZE(f)
+   INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Join
+   Merge Cond: (d.id = f.dim_id)
+   ->  Index Scan using join_dim_pkey on join_dim d
+   ->  Index Scan using join_fact_dim_id on join_fact f
+ Supplied Plan Advice:
+   MERGE_JOIN_PLAIN(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   MERGE_JOIN_PLAIN(f)
+   INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(11 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                 QUERY PLAN                 
+--------------------------------------------
+ Nested Loop
+   Join Filter: (f.dim_id = d.id)
+   ->  Seq Scan on join_dim d
+   ->  Materialize
+         ->  Seq Scan on join_fact f
+ Supplied Plan Advice:
+   NESTED_LOOP_MATERIALIZE(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   NESTED_LOOP_MATERIALIZE(f)
+   SEQ_SCAN(d f)
+   NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_dim d
+   ->  Memoize
+         Cache Key: d.id
+         Cache Mode: logical
+         ->  Index Scan using join_fact_dim_id on join_fact f
+               Index Cond: (dim_id = d.id)
+ Supplied Plan Advice:
+   NESTED_LOOP_MEMOIZE(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   NESTED_LOOP_MEMOIZE(f)
+   SEQ_SCAN(d)
+   INDEX_SCAN(f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on join_dim d
+   ->  Index Scan using join_fact_dim_id on join_fact f
+         Index Cond: (dim_id = d.id)
+ Supplied Plan Advice:
+   NESTED_LOOP_PLAIN(f) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(d f)
+   NESTED_LOOP_PLAIN(f)
+   SEQ_SCAN(d)
+   INDEX_SCAN(f public.join_fact_dim_id)
+   NO_GATHER(f d)
+(12 rows)
+
+COMMIT;
+-- We can't force a foreign join between these tables, because they
+-- aren't foreign tables.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Nested Loop
+   Disabled: true
+   ->  Seq Scan on join_fact f
+   ->  Index Scan using join_dim_pkey on join_dim d
+         Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+   FOREIGN_JOIN((f d)) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(f d)
+   NESTED_LOOP_PLAIN(d)
+   SEQ_SCAN(f)
+   INDEX_SCAN(d public.join_dim_pkey)
+   NO_GATHER(f d)
+(13 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/local_collector.out b/contrib/pg_plan_advice/expected/local_collector.out
new file mode 100644 (file)
index 0000000..ac5aecd
--- /dev/null
@@ -0,0 +1,64 @@
+CREATE EXTENSION pg_plan_advice;
+-- Try clearing advice before we've collected any.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice 
+---------------------------------
+(1 row)
+
+-- Set a small advice collection limit so that we'll exceed it.
+SET pg_plan_advice.local_collection_limit = 2;
+-- Set up a dummy table.
+CREATE TABLE dummy_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false, parallel_workers = 0);
+-- Test queries.
+SELECT * FROM dummy_table a, dummy_table b;
+ a | b | a | b 
+---+---+---+---
+(0 rows)
+
+SELECT * FROM dummy_table;
+ a | b 
+---+---
+(0 rows)
+
+-- Should return the advice from the second test query.
+SELECT advice FROM pg_get_collected_local_advice() ORDER BY id LIMIT 1;
+         advice         
+------------------------
+ SEQ_SCAN(dummy_table) +
+ NO_GATHER(dummy_table)
+(1 row)
+
+-- Now try clearing advice again.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice 
+---------------------------------
+(1 row)
+
+-- Raise the collection limit so that the collector uses multiple chunks.
+SET pg_plan_advice.local_collection_limit = 2000;
+-- Push a bunch of queries through the collector.
+DO $$
+BEGIN
+       FOR x IN 1..2000 LOOP
+               EXECUTE 'SELECT * FROM dummy_table';
+       END LOOP;
+END
+$$;
+-- Check that the collector worked.
+SELECT COUNT(*) FROM pg_get_collected_local_advice();
+ count 
+-------
+  2000
+(1 row)
+
+-- And clear one more time, to verify that this doesn't cause a problem
+-- even with a larger number of entries.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice 
+---------------------------------
+(1 row)
+
diff --git a/contrib/pg_plan_advice/expected/partitionwise.out b/contrib/pg_plan_advice/expected/partitionwise.out
new file mode 100644 (file)
index 0000000..df0f055
--- /dev/null
@@ -0,0 +1,243 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET enable_partitionwise_join = true;
+CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt1;
+CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt2 (id, dim2, val2)
+       SELECT g, 'some other text ' || g, (g % 5) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt2;
+CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt3 (id, dim3, val3)
+       SELECT g, 'a third random text ' || g, (g % 7) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt3;
+CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int)
+       PARTITION BY RANGE (id);
+CREATE TABLE ptmismatcha PARTITION OF ptmismatch
+    FOR VALUES FROM (1) to (1501)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE ptmismatchb PARTITION OF ptmismatch
+    FOR VALUES FROM (1501) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO ptmismatch (id, dimm, valm)
+       SELECT g, 'yet another text ' || g, (g % 2) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE ptmismatch;
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Append
+   ->  Nested Loop
+         ->  Hash Join
+               Hash Cond: (pt2_1.id = pt3_1.id)
+               ->  Seq Scan on pt2a pt2_1
+                     Filter: (val2 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt3a pt3_1
+                           Filter: (val3 = 1)
+         ->  Index Scan using pt1a_pkey on pt1a pt1_1
+               Index Cond: (id = pt2_1.id)
+               Filter: (val1 = 1)
+   ->  Nested Loop
+         ->  Hash Join
+               Hash Cond: (pt2_2.id = pt3_2.id)
+               ->  Seq Scan on pt2b pt2_2
+                     Filter: (val2 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt3b pt3_2
+                           Filter: (val3 = 1)
+         ->  Index Scan using pt1b_pkey on pt1b pt1_2
+               Index Cond: (id = pt2_2.id)
+               Filter: (val1 = 1)
+   ->  Nested Loop
+         ->  Hash Join
+               Hash Cond: (pt2_3.id = pt3_3.id)
+               ->  Seq Scan on pt2c pt2_3
+                     Filter: (val2 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt3c pt3_3
+                           Filter: (val3 = 1)
+         ->  Index Scan using pt1c_pkey on pt1c pt1_3
+               Index Cond: (id = pt2_3.id)
+               Filter: (val1 = 1)
+ Generated Plan Advice:
+   JOIN_ORDER(pt2/public.pt2a pt3/public.pt3a pt1/public.pt1a)
+   JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b)
+   JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c)
+   NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+   HASH_JOIN(pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+   SEQ_SCAN(pt2/public.pt2a pt3/public.pt3a pt2/public.pt2b pt3/public.pt3b
+    pt2/public.pt2c pt3/public.pt3c)
+   INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey
+    pt1/public.pt1c public.pt1c_pkey)
+   PARTITIONWISE((pt1 pt2 pt3))
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(47 rows)
+
+-- Suppress partitionwise join, or do it just partially.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (pt1.id = pt2.id)
+   ->  Append
+         ->  Seq Scan on pt1a pt1_1
+               Filter: (val1 = 1)
+         ->  Seq Scan on pt1b pt1_2
+               Filter: (val1 = 1)
+         ->  Seq Scan on pt1c pt1_3
+               Filter: (val1 = 1)
+   ->  Hash
+         ->  Hash Join
+               Hash Cond: (pt2.id = pt3.id)
+               ->  Append
+                     ->  Seq Scan on pt2a pt2_1
+                           Filter: (val2 = 1)
+                     ->  Seq Scan on pt2b pt2_2
+                           Filter: (val2 = 1)
+                     ->  Seq Scan on pt2c pt2_3
+                           Filter: (val2 = 1)
+               ->  Hash
+                     ->  Append
+                           ->  Seq Scan on pt3a pt3_1
+                                 Filter: (val3 = 1)
+                           ->  Seq Scan on pt3b pt3_2
+                                 Filter: (val3 = 1)
+                           ->  Seq Scan on pt3c pt3_3
+                                 Filter: (val3 = 1)
+ Supplied Plan Advice:
+   PARTITIONWISE(pt1) /* matched */
+   PARTITIONWISE(pt2) /* matched */
+   PARTITIONWISE(pt3) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(pt1 (pt2 pt3))
+   HASH_JOIN(pt3 pt3)
+   SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b
+    pt3/public.pt3c)
+   PARTITIONWISE(pt1 pt2 pt3)
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(40 rows)
+
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (pt1.id = pt3.id)
+   ->  Append
+         ->  Hash Join
+               Hash Cond: (pt1_1.id = pt2_1.id)
+               ->  Seq Scan on pt1a pt1_1
+                     Filter: (val1 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt2a pt2_1
+                           Filter: (val2 = 1)
+         ->  Hash Join
+               Hash Cond: (pt1_2.id = pt2_2.id)
+               ->  Seq Scan on pt1b pt1_2
+                     Filter: (val1 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt2b pt2_2
+                           Filter: (val2 = 1)
+         ->  Hash Join
+               Hash Cond: (pt1_3.id = pt2_3.id)
+               ->  Seq Scan on pt1c pt1_3
+                     Filter: (val1 = 1)
+               ->  Hash
+                     ->  Seq Scan on pt2c pt2_3
+                           Filter: (val2 = 1)
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on pt3a pt3_1
+                     Filter: (val3 = 1)
+               ->  Seq Scan on pt3b pt3_2
+                     Filter: (val3 = 1)
+               ->  Seq Scan on pt3c pt3_3
+                     Filter: (val3 = 1)
+ Supplied Plan Advice:
+   PARTITIONWISE((pt1 pt2)) /* matched */
+   PARTITIONWISE(pt3) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(pt1/public.pt1a pt2/public.pt2a)
+   JOIN_ORDER(pt1/public.pt1b pt2/public.pt2b)
+   JOIN_ORDER(pt1/public.pt1c pt2/public.pt2c)
+   JOIN_ORDER({pt1 pt2} pt3)
+   HASH_JOIN(pt2/public.pt2a pt2/public.pt2b pt2/public.pt2c pt3)
+   SEQ_SCAN(pt1/public.pt1a pt2/public.pt2a pt1/public.pt1b pt2/public.pt2b
+    pt1/public.pt1c pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b
+    pt3/public.pt3c)
+   PARTITIONWISE((pt1 pt2) pt3)
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+    pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(47 rows)
+
+COMMIT;
+-- Can't force a partitionwise join with a mismatched table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Nested Loop
+   Disabled: true
+   ->  Append
+         ->  Seq Scan on pt1a pt1_1
+         ->  Seq Scan on pt1b pt1_2
+         ->  Seq Scan on pt1c pt1_3
+   ->  Append
+         ->  Index Scan using ptmismatcha_pkey on ptmismatcha ptmismatch_1
+               Index Cond: (id = pt1.id)
+         ->  Index Scan using ptmismatchb_pkey on ptmismatchb ptmismatch_2
+               Index Cond: (id = pt1.id)
+ Supplied Plan Advice:
+   PARTITIONWISE((pt1 ptmismatch)) /* matched, failed */
+ Generated Plan Advice:
+   JOIN_ORDER(pt1 ptmismatch)
+   NESTED_LOOP_PLAIN(ptmismatch)
+   SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+   INDEX_SCAN(ptmismatch/public.ptmismatcha public.ptmismatcha_pkey
+    ptmismatch/public.ptmismatchb public.ptmismatchb_pkey)
+   PARTITIONWISE(pt1 ptmismatch)
+   NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c
+    ptmismatch/public.ptmismatcha ptmismatch/public.ptmismatchb)
+(22 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/scan.out b/contrib/pg_plan_advice/expected/scan.out
new file mode 100644 (file)
index 0000000..61f361f
--- /dev/null
@@ -0,0 +1,757 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET seq_page_cost = 0.1;
+SET random_page_cost = 0.1;
+SET cpu_tuple_cost = 0;
+SET cpu_index_tuple_cost = 0;
+CREATE TABLE scan_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO scan_table
+       SELECT g, 'some text ' || g FROM generate_series(1, 100000) g;
+CREATE INDEX scan_table_b ON scan_table USING brin (b);
+VACUUM ANALYZE scan_table;
+-- Sequential scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+       QUERY PLAN        
+-------------------------
+ Seq Scan on scan_table
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(4 rows)
+
+-- Index scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- Index-only scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- Bitmap heap scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+                     QUERY PLAN                     
+----------------------------------------------------
+ Bitmap Heap Scan on scan_table
+   Recheck Cond: (b > 'some text 8'::text)
+   ->  Bitmap Index Scan on scan_table_b
+         Index Cond: (b > 'some text 8'::text)
+ Generated Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table public.scan_table_b)
+   NO_GATHER(scan_table)
+(7 rows)
+
+-- TID scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Scan on scan_table
+   TID Cond: (ctid = '(0,1)'::tid)
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- TID range scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Tid Range Scan on scan_table
+   TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(5 rows)
+
+-- Try forcing each of our test queries to use the scan type they
+-- wanted to use anyway. This should succeed.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(6 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Bitmap Heap Scan on scan_table
+   Recheck Cond: (b > 'some text 8'::text)
+   ->  Bitmap Index Scan on scan_table_b
+         Index Cond: (b > 'some text 8'::text)
+ Supplied Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table scan_table_b) /* matched */
+ Generated Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table public.scan_table_b)
+   NO_GATHER(scan_table)
+(9 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+              QUERY PLAN              
+--------------------------------------
+ Tid Scan on scan_table
+   TID Cond: (ctid = '(0,1)'::tid)
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Tid Range Scan on scan_table
+   TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   TID_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Try to force a full scan of the table to use some other scan type. All
+-- of these will fail. An index scan or bitmap heap scan could potentially
+-- generate the correct answer, but the planner does not even consider these
+-- possibilities due to the lack of a WHERE clause.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+                  QUERY PLAN                  
+----------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Try again to force index use. This should now succeed for the INDEX_SCAN
+-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the
+-- query fetches columns not included in the index.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a > 0)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+   Filter: (a > 0)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(8 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Bitmap Heap Scan on scan_table
+   Recheck Cond: (a > 0)
+   ->  Bitmap Index Scan on scan_table_pkey
+         Index Cond: (a > 0)
+ Supplied Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   BITMAP_HEAP_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(9 rows)
+
+COMMIT;
+-- We can force a primary key lookup to use a sequential scan, but we
+-- can't force it to use an index-only scan (due to the column list)
+-- or a TID scan (due to the absence of a TID qual).
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Disabled: true
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Disabled: true
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   TID_SCAN(scan_table) /* matched, failed */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can forcibly downgrade an index-only scan to an index scan, but we can't
+-- force the use of an index that the planner thinks is inapplicable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_b) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can force the use of a sequential scan in place of a bitmap heap scan,
+-- but a plain index scan on a BRIN index is not possible.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+   Filter: (b > 'some text 8'::text)
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on scan_table
+   Disabled: true
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_b) /* matched, failed */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can force the use of a sequential scan rather than a TID scan or
+-- TID range scan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+              QUERY PLAN              
+--------------------------------------
+ Seq Scan on scan_table
+   Filter: (ctid = '(0,1)'::tid)
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on scan_table
+   Filter: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Supplied Plan Advice:
+   SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(scan_table)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Test more complex scenarios with index scans.
+BEGIN;
+-- Should still work if we mention the schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+-- But not if we mention the wrong schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table cilbup.scan_table_pkey) /* matched, inapplicable */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+-- It's OK to repeat the same advice.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+-- But it doesn't work if the index target is even notionally different.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table scan_table_pkey) /* matched, conflicting */
+   INDEX_SCAN(scan_table public.scan_table_pkey) /* matched, conflicting */
+ Generated Plan Advice:
+   INDEX_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- Test assorted incorrect advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(nothing) /* not matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(nothing whatsoever) /* not matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_SCAN(scan_table bogus) /* matched, inapplicable, failed */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(nothing whatsoever) /* not matched */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   INDEX_ONLY_SCAN(scan_table bogus) /* matched, inapplicable */
+ Generated Plan Advice:
+   INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+   NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Test our ability to refer to multiple instances of the same alias.
+BEGIN;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Nested Loop Left Join
+   ->  Nested Loop Left Join
+         ->  Function Scan on generate_series g
+         ->  Index Scan using scan_table_pkey on scan_table s
+               Index Cond: (a = g.g)
+   ->  Index Scan using scan_table_pkey on scan_table s_1
+         Index Cond: (a = g.g)
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   NESTED_LOOP_PLAIN(s s#2)
+   INDEX_SCAN(s public.scan_table_pkey s#2 public.scan_table_pkey)
+   NO_GATHER(g s s#2)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop Left Join
+   ->  Hash Left Join
+         Hash Cond: (g.g = s.a)
+         ->  Function Scan on generate_series g
+         ->  Hash
+               ->  Seq Scan on scan_table s
+   ->  Index Scan using scan_table_pkey on scan_table s_1
+         Index Cond: (a = g.g)
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   NESTED_LOOP_PLAIN(s#2)
+   HASH_JOIN(s)
+   SEQ_SCAN(s)
+   INDEX_SCAN(s#2 public.scan_table_pkey)
+   NO_GATHER(g s s#2)
+(17 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Hash Left Join
+   Hash Cond: (g.g = s_1.a)
+   ->  Nested Loop Left Join
+         ->  Function Scan on generate_series g
+         ->  Index Scan using scan_table_pkey on scan_table s
+               Index Cond: (a = g.g)
+   ->  Hash
+         ->  Seq Scan on scan_table s_1
+ Supplied Plan Advice:
+   SEQ_SCAN(s#2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   NESTED_LOOP_PLAIN(s)
+   HASH_JOIN(s#2)
+   SEQ_SCAN(s#2)
+   INDEX_SCAN(s public.scan_table_pkey)
+   NO_GATHER(g s s#2)
+(17 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Left Join
+   Hash Cond: (g.g = s_1.a)
+   ->  Hash Left Join
+         Hash Cond: (g.g = s.a)
+         ->  Function Scan on generate_series g
+         ->  Hash
+               ->  Seq Scan on scan_table s
+   ->  Hash
+         ->  Seq Scan on scan_table s_1
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* matched */
+   SEQ_SCAN(s#2) /* matched */
+ Generated Plan Advice:
+   JOIN_ORDER(g s s#2)
+   HASH_JOIN(s s#2)
+   SEQ_SCAN(s s#2)
+   NO_GATHER(g s s#2)
+(17 rows)
+
+COMMIT;
+-- Test our ability to refer to scans within a subquery.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_SCAN(s@x public.scan_table_pkey)
+   NO_GATHER(x s@x)
+(5 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Generated Plan Advice:
+   INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(5 rows)
+
+BEGIN;
+-- Should not match.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@x public.scan_table_pkey)
+   NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+-- Should match first query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on scan_table s
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@x) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(s@x)
+   NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@x) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+-- Should match second query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+   Index Cond: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@unnamed_subquery) /* not matched */
+ Generated Plan Advice:
+   INDEX_SCAN(s@x public.scan_table_pkey)
+   NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+                    QUERY PLAN                    
+--------------------------------------------------
+ Seq Scan on scan_table s
+   Filter: (a = 1)
+ Supplied Plan Advice:
+   SEQ_SCAN(s@unnamed_subquery) /* matched */
+ Generated Plan Advice:
+   SEQ_SCAN(s@unnamed_subquery)
+   NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/syntax.out b/contrib/pg_plan_advice/expected/syntax.out
new file mode 100644 (file)
index 0000000..dddb12c
--- /dev/null
@@ -0,0 +1,59 @@
+LOAD 'pg_plan_advice';
+-- An empty string is allowed, and so is an empty target list.
+SET pg_plan_advice.advice = '';
+SET pg_plan_advice.advice = 'SEQ_SCAN()';
+-- Test assorted variations in capitalization, whitespace, and which parts of
+-- the relation identifier are included. These should all work.
+SET pg_plan_advice.advice = 'SEQ_SCAN(x)';
+SET pg_plan_advice.advice = 'seq_scan(x@y)';
+SET pg_plan_advice.advice = 'SEQ_scan(x#2)';
+SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)';
+SET pg_plan_advice.advice = '  SEQ_SCAN ( x / y . z )  ';
+SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")';
+-- Syntax errors.
+SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQUENTIAL_SCAN(x)"
+DETAIL:  Could not parse advice: syntax error at or near "SEQUENTIAL_SCAN"
+SET pg_plan_advice.advice = 'SEQ_SCAN';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN"
+DETAIL:  Could not parse advice: syntax error at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN(';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("
+DETAIL:  Could not parse advice: syntax error at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN("';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(""
+DETAIL:  Could not parse advice: unterminated quoted identifier at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN(#';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(#"
+DETAIL:  Could not parse advice: syntax error at or near "#"
+SET pg_plan_advice.advice = '()';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "()"
+DETAIL:  Could not parse advice: syntax error at or near "("
+SET pg_plan_advice.advice = '123';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "123"
+DETAIL:  Could not parse advice: syntax error at or near "123"
+-- Legal comments.
+SET pg_plan_advice.advice = '/**/';
+SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)';
+-- Unterminated comments.
+SET pg_plan_advice.advice = '/*';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "/*"
+DETAIL:  Could not parse advice: unterminated comment at end of input
+SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "JOIN_ORDER("fOO") /* oops"
+DETAIL:  Could not parse advice: unterminated comment at end of input
+-- Nested comments are not supported, so the first of these is legal and
+-- the second is not.
+SET pg_plan_advice.advice = '/*/*/';
+SET pg_plan_advice.advice = '/*/* stuff */*/';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "/*/* stuff */*/"
+DETAIL:  Could not parse advice: syntax error at or near "*"
+-- Foreign join requires multiple relation identifiers.
+SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN(a)"
+DETAIL:  Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")"
+SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))';
+ERROR:  invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN((a))"
+DETAIL:  Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")"
diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build
new file mode 100644 (file)
index 0000000..3452e5a
--- /dev/null
@@ -0,0 +1,70 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+pg_plan_advice_sources = files(
+  'pg_plan_advice.c',
+  'pgpa_ast.c',
+  'pgpa_collector.c',
+  'pgpa_identifier.c',
+  'pgpa_join.c',
+  'pgpa_output.c',
+  'pgpa_planner.c',
+  'pgpa_scan.c',
+  'pgpa_trove.c',
+  'pgpa_walker.c',
+)
+
+pgpa_scanner = custom_target('pgpa_scanner',
+  input: 'pgpa_scanner.l',
+  output: 'pgpa_scanner.c',
+  command: flex_cmd,
+)
+generated_sources += pgpa_scanner
+pg_plan_advice_sources += pgpa_scanner
+
+pgpa_parser = custom_target('pgpa_parser',
+  input: 'pgpa_parser.y',
+  kwargs: bison_kw,
+)
+generated_sources += pgpa_parser.to_list()
+pg_plan_advice_sources += pgpa_parser
+
+if host_system == 'windows'
+  pg_plan_advice_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'pg_plan_advice',
+    '--FILEDESC', 'pg_plan_advice - help the planner get the right plan',])
+endif
+
+pg_plan_advice = shared_module('pg_plan_advice',
+  pg_plan_advice_sources,
+  include_directories: include_directories('.'),
+  kwargs: contrib_mod_args,
+)
+contrib_targets += pg_plan_advice
+
+install_data(
+  'pg_plan_advice--1.0.sql',
+  'pg_plan_advice.control',
+  kwargs: contrib_data_args,
+)
+
+tests += {
+  'name': 'pg_plan_advice',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'regress': {
+    'sql': [
+      'gather',
+      'join_order',
+      'join_strategy',
+      'local_collector',
+      'partitionwise',
+      'scan',
+      'syntax',
+    ],
+  },
+  'tap': {
+    'tests': [
+      't/001_regress.pl',
+    ],
+  },
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice--1.0.sql b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql
new file mode 100644 (file)
index 0000000..29f4f22
--- /dev/null
@@ -0,0 +1,42 @@
+/* contrib/pg_plan_advice/pg_plan_advice--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_plan_advice" to load this file. \quit
+
+CREATE FUNCTION pg_clear_collected_local_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_clear_collected_shared_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_shared_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_local_advice(
+       OUT id bigint,
+       OUT userid oid,
+       OUT dbid oid,
+       OUT queryid bigint,
+       OUT collection_time timestamptz,
+       OUT query text,
+       OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_shared_advice(
+       OUT id bigint,
+       OUT userid oid,
+       OUT dbid oid,
+       OUT queryid bigint,
+       OUT collection_time timestamptz,
+       OUT query text,
+       OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_shared_advice'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION pg_get_collected_shared_advice() FROM PUBLIC;
diff --git a/contrib/pg_plan_advice/pg_plan_advice.c b/contrib/pg_plan_advice/pg_plan_advice.c
new file mode 100644 (file)
index 0000000..f32e8b7
--- /dev/null
@@ -0,0 +1,454 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.c
+ *       main entrypoints for generating and applying planner advice
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pg_plan_advice.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_ast.h"
+#include "pgpa_collector.h"
+#include "pgpa_identifier.h"
+#include "pgpa_output.h"
+#include "pgpa_planner.h"
+#include "pgpa_trove.h"
+#include "pgpa_walker.h"
+
+#include "commands/defrem.h"
+#include "commands/explain.h"
+#include "commands/explain_format.h"
+#include "commands/explain_state.h"
+#include "funcapi.h"
+#include "optimizer/planner.h"
+#include "storage/dsm_registry.h"
+#include "utils/guc.h"
+
+PG_MODULE_MAGIC;
+
+static pgpa_shared_state *pgpa_state = NULL;
+static dsa_area *pgpa_dsa_area = NULL;
+
+/* GUC variables */
+char      *pg_plan_advice_advice = NULL;
+static bool pg_plan_advice_always_explain_supplied_advice = true;
+int                    pg_plan_advice_local_collection_limit = 0;
+int                    pg_plan_advice_shared_collection_limit = 0;
+
+/* Saved hook value */
+static explain_per_plan_hook_type prev_explain_per_plan = NULL;
+
+/* Other file-level globals */
+static int     es_extension_id;
+static MemoryContext pgpa_memory_context = NULL;
+
+static void pg_plan_advice_explain_option_handler(ExplainState *es,
+                                                                                                 DefElem *opt,
+                                                                                                 ParseState *pstate);
+static void pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+                                                                                                IntoClause *into,
+                                                                                                ExplainState *es,
+                                                                                                const char *queryString,
+                                                                                                ParamListInfo params,
+                                                                                                QueryEnvironment *queryEnv);
+static bool pg_plan_advice_advice_check_hook(char **newval, void **extra,
+                                                                                        GucSource source);
+static DefElem *find_defelem_by_defname(List *deflist, char *defname);
+
+/*
+ * Initialize this module.
+ */
+void
+_PG_init(void)
+{
+       DefineCustomStringVariable("pg_plan_advice.advice",
+                                                          "advice to apply during query planning",
+                                                          NULL,
+                                                          &pg_plan_advice_advice,
+                                                          NULL,
+                                                          PGC_USERSET,
+                                                          0,
+                                                          pg_plan_advice_advice_check_hook,
+                                                          NULL,
+                                                          NULL);
+
+       DefineCustomBoolVariable("pg_plan_advice.always_explain_supplied_advice",
+                                                        "EXPLAIN output includes supplied advice even without EXPLAIN (PLAN_ADVICE)",
+                                                        NULL,
+                                                        &pg_plan_advice_always_explain_supplied_advice,
+                                                        true,
+                                                        PGC_USERSET,
+                                                        0,
+                                                        NULL,
+                                                        NULL,
+                                                        NULL);
+
+       DefineCustomIntVariable("pg_plan_advice.local_collection_limit",
+                                                       "# of advice entries to retain in per-backend memory",
+                                                       NULL,
+                                                       &pg_plan_advice_local_collection_limit,
+                                                       0,
+                                                       0, INT_MAX,
+                                                       PGC_USERSET,
+                                                       0,
+                                                       NULL,
+                                                       NULL,
+                                                       NULL);
+
+       DefineCustomIntVariable("pg_plan_advice.shared_collection_limit",
+                                                       "# of advice entries to retain in shared memory",
+                                                       NULL,
+                                                       &pg_plan_advice_shared_collection_limit,
+                                                       0,
+                                                       0, INT_MAX,
+                                                       PGC_SUSET,
+                                                       0,
+                                                       NULL,
+                                                       NULL,
+                                                       NULL);
+
+       MarkGUCPrefixReserved("pg_plan_advice");
+
+       /* Get an ID that we can use to cache data in an ExplainState. */
+       es_extension_id = GetExplainExtensionId("pg_plan_advice");
+
+       /* Register the new EXPLAIN options implemented by this module. */
+       RegisterExtensionExplainOption("plan_advice",
+                                                                  pg_plan_advice_explain_option_handler);
+
+       /* Install hooks */
+       pgpa_planner_install_hooks();
+       prev_explain_per_plan = explain_per_plan_hook;
+       explain_per_plan_hook = pg_plan_advice_explain_per_plan_hook;
+}
+
+/*
+ * Initialize shared state when first created.
+ */
+static void
+pgpa_init_shared_state(void *ptr)
+{
+       pgpa_shared_state *state = (pgpa_shared_state *) ptr;
+
+       LWLockInitialize(&state->lock, LWLockNewTrancheId("pg_plan_advice_lock"));
+       state->dsa_tranche = LWLockNewTrancheId("pg_plan_advice_dsa");
+       state->area = DSA_HANDLE_INVALID;
+       state->shared_collector = InvalidDsaPointer;
+}
+
+/*
+ * Return a pointer to a memory context where long-lived data managed by this
+ * module can be stored.
+ */
+MemoryContext
+pg_plan_advice_get_mcxt(void)
+{
+       if (pgpa_memory_context == NULL)
+               pgpa_memory_context = AllocSetContextCreate(TopMemoryContext,
+                                                                                                       "pg_plan_advice",
+                                                                                                       ALLOCSET_DEFAULT_SIZES);
+
+       return pgpa_memory_context;
+}
+
+/*
+ * Get a pointer to our shared state.
+ *
+ * If no shared state exists, create and initialize it. If it does exist but
+ * this backend has not yet accessed it, attach to it. Otherwise, just return
+ * our cached pointer.
+ *
+ * Along the way, make sure the relevant LWLock tranches are registered.
+ */
+pgpa_shared_state *
+pg_plan_advice_attach(void)
+{
+       if (pgpa_state == NULL)
+       {
+               bool            found;
+
+               pgpa_state =
+                       GetNamedDSMSegment("pg_plan_advice", sizeof(pgpa_shared_state),
+                                                          pgpa_init_shared_state, &found);
+       }
+
+       return pgpa_state;
+}
+
+/*
+ * Return a pointer to pg_plan_advice's DSA area, creating it if needed.
+ */
+dsa_area *
+pg_plan_advice_dsa_area(void)
+{
+       if (pgpa_dsa_area == NULL)
+       {
+               pgpa_shared_state *state = pg_plan_advice_attach();
+               dsa_handle      area_handle;
+               MemoryContext oldcontext;
+
+               oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+
+               LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+               area_handle = state->area;
+               if (area_handle == DSA_HANDLE_INVALID)
+               {
+                       pgpa_dsa_area = dsa_create(state->dsa_tranche);
+                       dsa_pin(pgpa_dsa_area);
+                       state->area = dsa_get_handle(pgpa_dsa_area);
+                       LWLockRelease(&state->lock);
+               }
+               else
+               {
+                       LWLockRelease(&state->lock);
+                       pgpa_dsa_area = dsa_attach(area_handle);
+               }
+
+               dsa_pin_mapping(pgpa_dsa_area);
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       return pgpa_dsa_area;
+}
+
+/*
+ * Handler for EXPLAIN (PLAN_ADVICE).
+ */
+static void
+pg_plan_advice_explain_option_handler(ExplainState *es, DefElem *opt,
+                                                                         ParseState *pstate)
+{
+       bool       *plan_advice;
+
+       plan_advice = GetExplainExtensionState(es, es_extension_id);
+
+       if (plan_advice == NULL)
+       {
+               plan_advice = palloc0_object(bool);
+               SetExplainExtensionState(es, es_extension_id, plan_advice);
+       }
+
+       *plan_advice = defGetBoolean(opt);
+}
+
+/*
+ * Display a string that is likely to consist of multiple lines in EXPLAIN
+ * output.
+ */
+static void
+pg_plan_advice_explain_text_multiline(ExplainState *es, char *qlabel,
+                                                                         char *value)
+{
+       char       *s;
+
+       /* For non-text formats, it's best not to add any special handling. */
+       if (es->format != EXPLAIN_FORMAT_TEXT)
+       {
+               ExplainPropertyText(qlabel, value, es);
+               return;
+       }
+
+       /* In text format, if there is no data, display nothing. */
+       if (*qlabel == '\0')
+               return;
+
+       /*
+        * It looks nicest to indent each line of the advice separately, beginning
+        * on the line below the label.
+        */
+       ExplainIndentText(es);
+       appendStringInfo(es->str, "%s:\n", qlabel);
+       es->indent++;
+       while ((s = strchr(value, '\n')) != NULL)
+       {
+               ExplainIndentText(es);
+               appendBinaryStringInfo(es->str, value, (s - value) + 1);
+               value = s + 1;
+       }
+
+       /* Don't interpret a terminal newline as a request for an empty line. */
+       if (*value != '\0')
+       {
+               ExplainIndentText(es);
+               appendStringInfo(es->str, "%s\n", value);
+       }
+
+       es->indent--;
+}
+
+/*
+ * Add advice feedback to the EXPLAIN output.
+ */
+static void
+pg_plan_advice_explain_feedback(ExplainState *es, List *feedback)
+{
+       StringInfoData buf;
+
+       initStringInfo(&buf);
+       foreach_node(DefElem, item, feedback)
+       {
+               int                     flags = defGetInt32(item);
+
+               appendStringInfo(&buf, "%s /* ", item->defname);
+               if ((flags & PGPA_TE_MATCH_FULL) != 0)
+               {
+                       Assert((flags & PGPA_TE_MATCH_PARTIAL) != 0);
+                       appendStringInfo(&buf, "matched");
+               }
+               else if ((flags & PGPA_TE_MATCH_PARTIAL) != 0)
+                       appendStringInfo(&buf, "partially matched");
+               else
+                       appendStringInfo(&buf, "not matched");
+               if ((flags & PGPA_TE_INAPPLICABLE) != 0)
+                       appendStringInfo(&buf, ", inapplicable");
+               if ((flags & PGPA_TE_CONFLICTING) != 0)
+                       appendStringInfo(&buf, ", conflicting");
+               if ((flags & PGPA_TE_FAILED) != 0)
+                       appendStringInfo(&buf, ", failed");
+               appendStringInfo(&buf, " */\n");
+       }
+
+       pg_plan_advice_explain_text_multiline(es, "Supplied Plan Advice",
+                                                                                 buf.data);
+}
+
+/*
+ * Add relevant details, if any, to the EXPLAIN output for a single plan.
+ */
+static void
+pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+                                                                        IntoClause *into,
+                                                                        ExplainState *es,
+                                                                        const char *queryString,
+                                                                        ParamListInfo params,
+                                                                        QueryEnvironment *queryEnv)
+{
+       bool       *plan_advice = GetExplainExtensionState(es, es_extension_id);
+       DefElem    *pgpa_item;
+       List       *pgpa_list;
+
+       if (prev_explain_per_plan)
+               prev_explain_per_plan(plannedstmt, into, es, queryString, params,
+                                                         queryEnv);
+
+       /* Find any data pgpa_planner_shutdown stashed in the PlannedStmt. */
+       pgpa_item = find_defelem_by_defname(plannedstmt->extension_state,
+                                                                               "pg_plan_advice");
+       pgpa_list = pgpa_item == NULL ? NULL : (List *) pgpa_item->arg;
+
+       /*
+        * By default, if there is a record of attempting to apply advice during
+        * query planning, we always output that information, but the user can set
+        * pg_plan_advice.always_explain_supplied_advice = false to suppress that
+        * behavior. If they do, we'll only display it when the PLAN_ADVICE option
+        * was specified and not set to false.
+        *
+        * NB: If we're explaining a query planned beforehand -- i.e. a prepared
+        * statement -- the application of query advice may not have been
+        * recorded, and therefore this won't be able to show anything.
+        */
+       if (pgpa_list != NULL && (pg_plan_advice_always_explain_supplied_advice ||
+                                                         (plan_advice != NULL && *plan_advice)))
+       {
+               DefElem    *feedback;
+
+               feedback = find_defelem_by_defname(pgpa_list, "feedback");
+               if (feedback != NULL)
+                       pg_plan_advice_explain_feedback(es, (List *) feedback->arg);
+       }
+
+       /*
+        * If the PLAN_ADVICE option was specified -- and not sent to FALSE --
+        * show generated advice.
+        */
+       if (plan_advice != NULL && *plan_advice)
+       {
+               DefElem    *advice_string_item;
+               char       *advice_string;
+
+               advice_string_item =
+                       find_defelem_by_defname(pgpa_list, "advice_string");
+               if (advice_string_item != NULL)
+               {
+                       /* Advice has already been generated; we can reuse it. */
+                       advice_string = strVal(advice_string_item->arg);
+               }
+               else
+               {
+                       pgpa_plan_walker_context walker;
+                       StringInfoData buf;
+                       pgpa_identifier *rt_identifiers;
+
+                       /* Advice not yet generated; do that now. */
+                       pgpa_plan_walker(&walker, plannedstmt);
+                       rt_identifiers =
+                               pgpa_create_identifiers_for_planned_stmt(plannedstmt);
+                       initStringInfo(&buf);
+                       pgpa_output_advice(&buf, &walker, rt_identifiers);
+                       advice_string = buf.data;
+               }
+
+               if (advice_string[0] != '\0')
+                       pg_plan_advice_explain_text_multiline(es, "Generated Plan Advice",
+                                                                                                 advice_string);
+       }
+}
+
+/*
+ * Check hook for pg_plan_advice.advice
+ */
+static bool
+pg_plan_advice_advice_check_hook(char **newval, void **extra, GucSource source)
+{
+       MemoryContext oldcontext;
+       MemoryContext tmpcontext;
+       char       *error;
+
+       if (*newval == NULL)
+               return true;
+
+       tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+                                                                          "pg_plan_advice.advice",
+                                                                          ALLOCSET_DEFAULT_SIZES);
+       oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+       /*
+        * It would be nice to save the parse tree that we construct here for
+        * eventual use when planning with this advice, but *extra can only point
+        * to a single guc_malloc'd chunk, and our parse tree involves an
+        * arbitrary number of memory allocations.
+        */
+       (void) pgpa_parse(*newval, &error);
+
+       if (error != NULL)
+       {
+               GUC_check_errdetail("Could not parse advice: %s", error);
+               return false;
+       }
+
+       MemoryContextSwitchTo(oldcontext);
+       MemoryContextDelete(tmpcontext);
+
+       return true;
+}
+
+/*
+ * Search a list of DefElem objects for a given defname.
+ */
+static DefElem *
+find_defelem_by_defname(List *deflist, char *defname)
+{
+       foreach_node(DefElem, item, deflist)
+       {
+               if (strcmp(item->defname, defname) == 0)
+                       return item;
+       }
+
+       return NULL;
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice.control b/contrib/pg_plan_advice/pg_plan_advice.control
new file mode 100644 (file)
index 0000000..aa6fdc9
--- /dev/null
@@ -0,0 +1,5 @@
+# pg_plan_advice extension
+comment = 'help the planner get the right plan'
+default_version = '1.0'
+module_pathname = '$libdir/pg_plan_advice'
+relocatable = true
diff --git a/contrib/pg_plan_advice/pg_plan_advice.h b/contrib/pg_plan_advice/pg_plan_advice.h
new file mode 100644 (file)
index 0000000..86efb3b
--- /dev/null
@@ -0,0 +1,37 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.h
+ *       main header file for pg_plan_advice contrib module
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pg_plan_advice.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PLAN_ADVICE_H
+#define PG_PLAN_ADVICE_H
+
+#include "nodes/plannodes.h"
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
+
+typedef struct pgpa_shared_state
+{
+       LWLock          lock;
+       int                     dsa_tranche;
+       dsa_handle      area;
+       dsa_pointer shared_collector;
+} pgpa_shared_state;
+
+/* GUC variables */
+extern int     pg_plan_advice_local_collection_limit;
+extern int     pg_plan_advice_shared_collection_limit;
+extern char *pg_plan_advice_advice;
+
+/* Function prototypes */
+extern MemoryContext pg_plan_advice_get_mcxt(void);
+extern pgpa_shared_state *pg_plan_advice_attach(void);
+extern dsa_area *pg_plan_advice_dsa_area(void);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_ast.c b/contrib/pg_plan_advice/pgpa_ast.c
new file mode 100644 (file)
index 0000000..02ffbfa
--- /dev/null
@@ -0,0 +1,392 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_ast.c
+ *       additional supporting code related to plan advice parsing
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_ast.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_ast.h"
+
+#include "funcapi.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+static bool pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids,
+                                                                                 pgpa_advice_target *target,
+                                                                                 bool *rids_used);
+
+/*
+ * Get a C string that corresponds to the specified advice tag.
+ */
+char *
+pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag)
+{
+       switch (advice_tag)
+       {
+               case PGPA_TAG_BITMAP_HEAP_SCAN:
+                       return "BITMAP_HEAP_SCAN";
+               case PGPA_TAG_FOREIGN_JOIN:
+                       return "FOREIGN_JOIN";
+               case PGPA_TAG_GATHER:
+                       return "GATHER";
+               case PGPA_TAG_GATHER_MERGE:
+                       return "GATHER_MERGE";
+               case PGPA_TAG_HASH_JOIN:
+                       return "HASH_JOIN";
+               case PGPA_TAG_INDEX_ONLY_SCAN:
+                       return "INDEX_ONLY_SCAN";
+               case PGPA_TAG_INDEX_SCAN:
+                       return "INDEX_SCAN";
+               case PGPA_TAG_JOIN_ORDER:
+                       return "JOIN_ORDER";
+               case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       return "MERGE_JOIN_MATERIALIZE";
+               case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       return "MERGE_JOIN_PLAIN";
+               case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       return "NESTED_LOOP_MATERIALIZE";
+               case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       return "NESTED_LOOP_MEMOIZE";
+               case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       return "NESTED_LOOP_PLAIN";
+               case PGPA_TAG_NO_GATHER:
+                       return "NO_GATHER";
+               case PGPA_TAG_PARTITIONWISE:
+                       return "PARTITIONWISE";
+               case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+                       return "SEMIJOIN_NON_UNIQUE";
+               case PGPA_TAG_SEMIJOIN_UNIQUE:
+                       return "SEMIJOIN_UNIQUE";
+               case PGPA_TAG_SEQ_SCAN:
+                       return "SEQ_SCAN";
+               case PGPA_TAG_TID_SCAN:
+                       return "TID_SCAN";
+       }
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Convert an advice tag, formatted as a string that has already been
+ * downcased as appropriate, to a pgpa_advice_tag_type.
+ *
+ * If we succeed, set *fail = false and return the result; if we fail,
+ * set *fail = true and reurn an arbitrary value.
+ */
+pgpa_advice_tag_type
+pgpa_parse_advice_tag(const char *tag, bool *fail)
+{
+       *fail = false;
+
+       switch (tag[0])
+       {
+               case 'b':
+                       if (strcmp(tag, "bitmap_heap_scan") == 0)
+                               return PGPA_TAG_BITMAP_HEAP_SCAN;
+                       break;
+               case 'f':
+                       if (strcmp(tag, "foreign_join") == 0)
+                               return PGPA_TAG_FOREIGN_JOIN;
+                       break;
+               case 'g':
+                       if (strcmp(tag, "gather") == 0)
+                               return PGPA_TAG_GATHER;
+                       if (strcmp(tag, "gather_merge") == 0)
+                               return PGPA_TAG_GATHER_MERGE;
+                       break;
+               case 'h':
+                       if (strcmp(tag, "hash_join") == 0)
+                               return PGPA_TAG_HASH_JOIN;
+                       break;
+               case 'i':
+                       if (strcmp(tag, "index_scan") == 0)
+                               return PGPA_TAG_INDEX_SCAN;
+                       if (strcmp(tag, "index_only_scan") == 0)
+                               return PGPA_TAG_INDEX_ONLY_SCAN;
+                       break;
+               case 'j':
+                       if (strcmp(tag, "join_order") == 0)
+                               return PGPA_TAG_JOIN_ORDER;
+                       break;
+               case 'm':
+                       if (strcmp(tag, "merge_join_materialize") == 0)
+                               return PGPA_TAG_MERGE_JOIN_MATERIALIZE;
+                       if (strcmp(tag, "merge_join_plain") == 0)
+                               return PGPA_TAG_MERGE_JOIN_PLAIN;
+                       break;
+               case 'n':
+                       if (strcmp(tag, "nested_loop_materialize") == 0)
+                               return PGPA_TAG_NESTED_LOOP_MATERIALIZE;
+                       if (strcmp(tag, "nested_loop_memoize") == 0)
+                               return PGPA_TAG_NESTED_LOOP_MEMOIZE;
+                       if (strcmp(tag, "nested_loop_plain") == 0)
+                               return PGPA_TAG_NESTED_LOOP_PLAIN;
+                       if (strcmp(tag, "no_gather") == 0)
+                               return PGPA_TAG_NO_GATHER;
+                       break;
+               case 'p':
+                       if (strcmp(tag, "partitionwise") == 0)
+                               return PGPA_TAG_PARTITIONWISE;
+                       break;
+               case 's':
+                       if (strcmp(tag, "semijoin_non_unique") == 0)
+                               return PGPA_TAG_SEMIJOIN_NON_UNIQUE;
+                       if (strcmp(tag, "semijoin_unique") == 0)
+                               return PGPA_TAG_SEMIJOIN_UNIQUE;
+                       if (strcmp(tag, "seq_scan") == 0)
+                               return PGPA_TAG_SEQ_SCAN;
+                       break;
+               case 't':
+                       if (strcmp(tag, "tid_scan") == 0)
+                               return PGPA_TAG_TID_SCAN;
+                       break;
+       }
+
+       /* didn't work out */
+       *fail = true;
+
+       /* return an arbitrary value to unwind the call stack */
+       return PGPA_TAG_SEQ_SCAN;
+}
+
+/*
+ * Format a pgpa_advice_target as a string and append result to a StringInfo.
+ */
+void
+pgpa_format_advice_target(StringInfo str, pgpa_advice_target *target)
+{
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               bool            first = true;
+               char       *delims;
+
+               if (target->ttype == PGPA_TARGET_UNORDERED_LIST)
+                       delims = "{}";
+               else
+                       delims = "()";
+
+               appendStringInfoChar(str, delims[0]);
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       if (first)
+                               first = false;
+                       else
+                               appendStringInfoChar(str, ' ');
+                       pgpa_format_advice_target(str, child_target);
+               }
+               appendStringInfoChar(str, delims[1]);
+       }
+       else
+       {
+               const char *rt_identifier;
+
+               rt_identifier = pgpa_identifier_string(&target->rid);
+               appendStringInfoString(str, rt_identifier);
+       }
+}
+
+/*
+ * Format a pgpa_index_target as a string and append result to a StringInfo.
+ */
+void
+pgpa_format_index_target(StringInfo str, pgpa_index_target *itarget)
+{
+       if (itarget->itype != PGPA_INDEX_NAME)
+       {
+               bool            first = true;
+
+               if (itarget->itype == PGPA_INDEX_AND)
+                       appendStringInfoString(str, "&&(");
+               else
+                       appendStringInfoString(str, "||(");
+
+               foreach_ptr(pgpa_index_target, child_target, itarget->children)
+               {
+                       if (first)
+                               first = false;
+                       else
+                               appendStringInfoChar(str, ' ');
+                       pgpa_format_index_target(str, child_target);
+               }
+               appendStringInfoChar(str, ')');
+       }
+       else
+       {
+               if (itarget->indnamespace != NULL)
+                       appendStringInfo(str, "%s.",
+                                                        quote_identifier(itarget->indnamespace));
+               appendStringInfoString(str, quote_identifier(itarget->indname));
+       }
+}
+
+/*
+ * Determine whether two pgpa_index_target objects are exactly identical.
+ */
+bool
+pgpa_index_targets_equal(pgpa_index_target *i1, pgpa_index_target *i2)
+{
+       if (i1->itype != i2->itype)
+               return false;
+
+       if (i1->itype == PGPA_INDEX_NAME)
+       {
+               /* indnamespace can be NULL, and two NULL values are equal */
+               if ((i1->indnamespace != NULL || i2->indnamespace != NULL) &&
+                       (i1->indnamespace == NULL || i2->indnamespace == NULL ||
+                        strcmp(i1->indnamespace, i2->indnamespace) != 0))
+                       return false;
+               if (strcmp(i1->indname, i2->indname) != 0)
+                       return false;
+       }
+       else
+       {
+               int                     i1_length = list_length(i1->children);
+
+               if (i1_length != list_length(i2->children))
+                       return false;
+               for (int n = 0; n < i1_length; ++n)
+               {
+                       pgpa_index_target *c1 = list_nth(i1->children, n);
+                       pgpa_index_target *c2 = list_nth(i2->children, n);
+
+                       if (!pgpa_index_targets_equal(c1, c2))
+                               return false;
+               }
+       }
+
+       return true;
+}
+
+/*
+ * Check whether an identifier matches an any part of an advice target.
+ */
+bool
+pgpa_identifier_matches_target(pgpa_identifier *rid, pgpa_advice_target *target)
+{
+       /* For non-identifiers, check all descendents. */
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       if (pgpa_identifier_matches_target(rid, child_target))
+                               return true;
+               }
+               return false;
+       }
+
+       if (strcmp(rid->alias_name, target->rid.alias_name) != 0)
+               return false;
+       if (rid->occurrence != target->rid.occurrence)
+               return false;
+
+       /*
+        * The identifier must specify a schema, but the target may leave the
+        * schema NULL to match anything.
+        */
+       if (target->rid.partnsp != NULL &&
+               strcmp(rid->partnsp, target->rid.partnsp) != 0)
+               return false;
+
+
+       /*
+        * These fields can be NULL on either side, but NULL only matches another
+        * NULL.
+        */
+       if (!strings_equal_or_both_null(rid->partrel, target->rid.partrel))
+               return false;
+       if (!strings_equal_or_both_null(rid->plan_name, target->rid.plan_name))
+               return false;
+
+       return true;
+}
+
+/*
+ * Match identifiers to advice targets and return an enum value indicating
+ * the relationship between the set of keys and the set of targets.
+ *
+ * See the comments for pgpa_itm_type.
+ */
+pgpa_itm_type
+pgpa_identifiers_match_target(int nrids, pgpa_identifier *rids,
+                                                         pgpa_advice_target *target)
+{
+       bool            all_rids_used = true;
+       bool            any_rids_used = false;
+       bool            all_targets_used;
+       bool       *rids_used = palloc0_array(bool, nrids);
+
+       all_targets_used =
+               pgpa_identifiers_cover_target(nrids, rids, target, rids_used);
+
+       for (int i = 0; i < nrids; ++i)
+       {
+               if (rids_used[i])
+                       any_rids_used = true;
+               else
+                       all_rids_used = false;
+       }
+
+       if (all_rids_used)
+       {
+               if (all_targets_used)
+                       return PGPA_ITM_EQUAL;
+               else
+                       return PGPA_ITM_KEYS_ARE_SUBSET;
+       }
+       else
+       {
+               if (all_targets_used)
+                       return PGPA_ITM_TARGETS_ARE_SUBSET;
+               else if (any_rids_used)
+                       return PGPA_ITM_INTERSECTING;
+               else
+                       return PGPA_ITM_DISJOINT;
+       }
+}
+
+/*
+ * Returns true if every target or sub-target is matched by at least one
+ * identifier, and otherwise false.
+ *
+ * Also sets rids_used[i] = true for each idenifier that matches at least one
+ * target.
+ */
+static bool
+pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids,
+                                                         pgpa_advice_target *target, bool *rids_used)
+{
+       bool            result = false;
+
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               result = true;
+
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       if (!pgpa_identifiers_cover_target(nrids, rids, child_target,
+                                                                                          rids_used))
+                               result = false;
+               }
+       }
+       else
+       {
+               for (int i = 0; i < nrids; ++i)
+               {
+                       if (pgpa_identifier_matches_target(&rids[i], target))
+                       {
+                               rids_used[i] = true;
+                               result = true;
+                       }
+               }
+       }
+
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_ast.h b/contrib/pg_plan_advice/pgpa_ast.h
new file mode 100644 (file)
index 0000000..f6fe730
--- /dev/null
@@ -0,0 +1,204 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_ast.h
+ *       abstract syntax trees for plan advice, plus parser/scanner support
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_ast.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_AST_H
+#define PGPA_AST_H
+
+#include "pgpa_identifier.h"
+
+#include "nodes/pg_list.h"
+
+/*
+ * Advice items generally take the form SOME_TAG(item [...]), where an item
+ * can take various forms. The simplest case is a relation identifier, but
+ * some tags allow sublists, and JOIN_ORDER() allows both ordered and unordered
+ * sublists.
+ */
+typedef enum
+{
+       PGPA_TARGET_IDENTIFIER,         /* relation identifier */
+       PGPA_TARGET_ORDERED_LIST,       /* (item ...) */
+       PGPA_TARGET_UNORDERED_LIST      /* {item ...} */
+} pgpa_target_type;
+
+/*
+ * When an advice item describes a bitmap index scan, it may need to describe
+ * the use of multiple indexes.
+ */
+typedef enum
+{
+       PGPA_INDEX_NAME,                        /* index schema + name */
+       PGPA_INDEX_AND,                         /* &&(item ...) */
+       PGPA_INDEX_OR                           /* ||(item ...) */
+} pgpa_index_type;
+
+/*
+ * An index specification. We use this for INDEX_SCAN, INDEX_ONLY_SCAN,
+ * and BITMAP_HEAP_SCAN advice, but in the former two cases, the target must
+ * be of type PGPA_INDEX_NAME.
+ */
+typedef struct pgpa_index_target
+{
+       pgpa_index_type itype;
+
+       /* Index schem and name, when itype == PGPA_INDEX_NAME */
+       char       *indnamespace;
+       char       *indname;
+
+       /* List of pgpa_index_target objects, when itype != PGPA_INDEX_NAME */
+       List       *children;
+} pgpa_index_target;
+
+/*
+ * A single item about which advice is being given, which could be either
+ * a relation identifier that we want to break out into its constituent fields,
+ * or a sublist of some kind.
+ */
+typedef struct pgpa_advice_target
+{
+       pgpa_target_type ttype;
+
+       /*
+        * This field is meaningful when ttype is PGPA_TARGET_IDENTIFIER.
+        *
+        * All identifiers must have an alias name and an occurrence number; the
+        * remaining fields can be NULL. Note that it's possible to specify a
+        * partition name without a partition schema, but not the reverse.
+        */
+       pgpa_identifier rid;
+
+       /*
+        * This field is set when ttype is PPGA_TARGET_IDENTIFIER and the advice
+        * tag is PGPA_TAG_INDEX_SCAN, PGPA_TAG_INDEX_ONLY_SCAN, or
+        * PGPA_TAG_BITMAP_HEAP_SCAN.
+        */
+       pgpa_index_target *itarget;
+
+       /*
+        * When the ttype is PGPA_TARGET_<anything>_LIST, this field contains a
+        * list of additional pgpa_advice_target objects. Otherwise, it is unused.
+        */
+       List       *children;
+} pgpa_advice_target;
+
+/*
+ * These are all the kinds of advice that we know how to parse. If a keyword
+ * is found at the top level, it must be in this list.
+ *
+ * If you change anything here, also update pgpa_parse_advice_tag and
+ * pgpa_cstring_advice_tag.
+ */
+typedef enum pgpa_advice_tag_type
+{
+       PGPA_TAG_BITMAP_HEAP_SCAN,
+       PGPA_TAG_FOREIGN_JOIN,
+       PGPA_TAG_GATHER,
+       PGPA_TAG_GATHER_MERGE,
+       PGPA_TAG_HASH_JOIN,
+       PGPA_TAG_INDEX_ONLY_SCAN,
+       PGPA_TAG_INDEX_SCAN,
+       PGPA_TAG_JOIN_ORDER,
+       PGPA_TAG_MERGE_JOIN_MATERIALIZE,
+       PGPA_TAG_MERGE_JOIN_PLAIN,
+       PGPA_TAG_NESTED_LOOP_MATERIALIZE,
+       PGPA_TAG_NESTED_LOOP_MEMOIZE,
+       PGPA_TAG_NESTED_LOOP_PLAIN,
+       PGPA_TAG_NO_GATHER,
+       PGPA_TAG_PARTITIONWISE,
+       PGPA_TAG_SEMIJOIN_NON_UNIQUE,
+       PGPA_TAG_SEMIJOIN_UNIQUE,
+       PGPA_TAG_SEQ_SCAN,
+       PGPA_TAG_TID_SCAN
+} pgpa_advice_tag_type;
+
+/*
+ * An item of advice, meaning a tag and the list of all targets to which
+ * it is being applied.
+ *
+ * "targets" is a list of pgpa_advice_target objects.
+ *
+ * The List returned from pgpa_yyparse is list of pgpa_advice_item objects.
+ */
+typedef struct pgpa_advice_item
+{
+       pgpa_advice_tag_type tag;
+       List       *targets;
+} pgpa_advice_item;
+
+/*
+ * Result of comparing an array of pgpa_relation_identifier objects to a
+ * pgpa_advice_target.
+ *
+ * PGPA_ITM_EQUAL means all targets are matched by some identifier, and
+ * all identifiers were matched to a target.
+ *
+ * PGPA_ITM_KEYS_ARE_SUBSET means that all identifiers matched to a target,
+ * but there were leftover targets. Generally, this means that the advice is
+ * looking to apply to all of the rels we have plus some additional ones that
+ * we don't have.
+ *
+ * PGPA_ITM_TARGETS_ARE_SUBSET means that all targets are matched by an
+ * identifiers, but there were leftover identifiers. Generally, this means
+ * that the advice is looking to apply to some but not all of the rels we have.
+ *
+ * PGPA_ITM_INTERSECTING means that some identifeirs and targets were matched,
+ * but neither all identifiers nor all targets could be matched to items in
+ * the other set.
+ *
+ * PGPA_ITM_DISJOINT means that no matches between identifeirs and targets were
+ * found.
+ */
+typedef enum
+{
+       PGPA_ITM_EQUAL,
+       PGPA_ITM_KEYS_ARE_SUBSET,
+       PGPA_ITM_TARGETS_ARE_SUBSET,
+       PGPA_ITM_INTERSECTING,
+       PGPA_ITM_DISJOINT
+} pgpa_itm_type;
+
+/* for pgpa_scanner.l and pgpa_parser.y */
+union YYSTYPE;
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+#endif
+
+/* in pgpa_scanner.l */
+extern int     pgpa_yylex(union YYSTYPE *yylval_param, List **result,
+                                          char **parse_error_msg_p, yyscan_t yyscanner);
+extern void pgpa_yyerror(List **result, char **parse_error_msg_p,
+                                                yyscan_t yyscanner,
+                                                const char *message);
+extern void pgpa_scanner_init(const char *str, yyscan_t *yyscannerp);
+extern void pgpa_scanner_finish(yyscan_t yyscanner);
+
+/* in pgpa_parser.y */
+extern int     pgpa_yyparse(List **result, char **parse_error_msg_p,
+                                                yyscan_t yyscanner);
+extern List *pgpa_parse(const char *advice_string, char **error_p);
+
+/* in pgpa_ast.c */
+extern char *pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag);
+extern bool pgpa_identifier_matches_target(pgpa_identifier *rid,
+                                                                                  pgpa_advice_target *target);
+extern pgpa_itm_type pgpa_identifiers_match_target(int nrids,
+                                                                                                  pgpa_identifier *rids,
+                                                                                                  pgpa_advice_target *target);
+extern bool pgpa_index_targets_equal(pgpa_index_target *i1,
+                                                                        pgpa_index_target *i2);
+extern pgpa_advice_tag_type pgpa_parse_advice_tag(const char *tag, bool *fail);
+extern void pgpa_format_advice_target(StringInfo str,
+                                                                         pgpa_advice_target *target);
+extern void pgpa_format_index_target(StringInfo str,
+                                                                        pgpa_index_target *itarget);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_collector.c b/contrib/pg_plan_advice/pgpa_collector.c
new file mode 100644 (file)
index 0000000..12085d9
--- /dev/null
@@ -0,0 +1,637 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_collector.c
+ *       collect advice into backend-local or shared memory
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_collector.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+
+#include "datatype/timestamp.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/timestamp.h"
+
+PG_FUNCTION_INFO_V1(pg_clear_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_clear_collected_shared_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_shared_advice);
+
+#define ADVICE_CHUNK_SIZE              1024
+#define ADVICE_CHUNK_ARRAY_SIZE        64
+
+#define        PG_GET_ADVICE_COLUMNS   7
+
+/*
+ * Advice extracted from one query plan, together with the query string
+ * and various other identifying details.
+ */
+typedef struct pgpa_collected_advice
+{
+       Oid                     userid;                 /* user OID */
+       Oid                     dbid;                   /* database OID */
+       uint64          queryid;                /* query identifier */
+       TimestampTz timestamp;          /* query timestamp */
+       int                     advice_offset;  /* start of advice in textual data */
+       char            textual_data[FLEXIBLE_ARRAY_MEMBER];
+} pgpa_collected_advice;
+
+/*
+ * A bunch of pointers to pgpa_collected_advice objects, stored in
+ * backend-local memory.
+ */
+typedef struct pgpa_local_advice_chunk
+{
+       pgpa_collected_advice *entries[ADVICE_CHUNK_SIZE];
+} pgpa_local_advice_chunk;
+
+/*
+ * Information about all of the pgpa_collected_advice objects that we're
+ * storing in local memory.
+ *
+ * We assign consecutive IDs, starting from 0, to each pgpa_collected_advice
+ * object that we store. The actual storage is an array of chunks, which
+ * helps keep memcpy() overhead low when we start discarding older data.
+ */
+typedef struct pgpa_local_advice
+{
+       uint64          next_id;
+       uint64          oldest_id;
+       uint64          base_id;
+       int                     chunk_array_allocated_size;
+       pgpa_local_advice_chunk **chunks;
+} pgpa_local_advice;
+
+/*
+ * Just like pgpa_local_advice_chunk, but stored in a dynamic shared area,
+ * so we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice_chunk
+{
+       dsa_pointer entries[ADVICE_CHUNK_SIZE];
+} pgpa_shared_advice_chunk;
+
+/*
+ * Just like pgpa_local_advice, but stored in a dynamic shared area, so
+ * we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice
+{
+       uint64          next_id;
+       uint64          oldest_id;
+       uint64          base_id;
+       int                     chunk_array_allocated_size;
+       dsa_pointer chunks;
+} pgpa_shared_advice;
+
+/* Pointers to local and shared collectors */
+static pgpa_local_advice *local_collector = NULL;
+static pgpa_shared_advice *shared_collector = NULL;
+
+/* Static functions */
+static pgpa_collected_advice *pgpa_make_collected_advice(Oid userid,
+                                                                                                                Oid dbid,
+                                                                                                                uint64 queryId,
+                                                                                                                TimestampTz timestamp,
+                                                                                                                const char *query_string,
+                                                                                                                const char *advice_string,
+                                                                                                                dsa_area *area,
+                                                                                                                dsa_pointer *result);
+static void pgpa_store_local_advice(pgpa_collected_advice *ca);
+static void pgpa_trim_local_advice(int limit);
+static void pgpa_store_shared_advice(dsa_pointer ca_pointer);
+static void pgpa_trim_shared_advice(dsa_area *area, int limit);
+
+/* Helper function to extract the query string from pgpa_collected_advice */
+static inline const char *
+query_string(pgpa_collected_advice *ca)
+{
+       return ca->textual_data;
+}
+
+/* Helper function to extract the advice string from pgpa_collected_advice */
+static inline const char *
+advice_string(pgpa_collected_advice *ca)
+{
+       return ca->textual_data + ca->advice_offset;
+}
+
+/*
+ * Store collected query advice into the local or shared advice collector,
+ * as appropriate.
+ */
+void
+pgpa_collect_advice(uint64 queryId, const char *query_string,
+                                       const char *advice_string)
+{
+       Oid                     userid = GetUserId();
+       Oid                     dbid = MyDatabaseId;
+       TimestampTz now = GetCurrentTimestamp();
+
+       if (pg_plan_advice_local_collection_limit > 0)
+       {
+               pgpa_collected_advice *ca;
+               MemoryContext oldcontext;
+
+               oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+               ca = pgpa_make_collected_advice(userid, dbid, queryId, now,
+                                                                               query_string, advice_string,
+                                                                               NULL, NULL);
+               pgpa_store_local_advice(ca);
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       if (pg_plan_advice_shared_collection_limit > 0)
+       {
+               dsa_area   *area = pg_plan_advice_dsa_area();
+               dsa_pointer ca_pointer;
+
+               pgpa_make_collected_advice(userid, dbid, queryId, now,
+                                                                  query_string, advice_string, area,
+                                                                  &ca_pointer);
+               pgpa_store_shared_advice(ca_pointer);
+       }
+}
+
+/*
+ * Allocate and fill a new pgpa_collected_advice object.
+ *
+ * If area != NULL, it is used to allocate the new object, and the resulting
+ * dsa_pointer is returned via *result.
+ *
+ * If area == NULL, the new object is allocated in the current memory context,
+ * and result is not examined or modified.
+ */
+static pgpa_collected_advice *
+pgpa_make_collected_advice(Oid userid, Oid dbid, uint64 queryId,
+                                                  TimestampTz timestamp,
+                                                  const char *query_string,
+                                                  const char *advice_string,
+                                                  dsa_area *area, dsa_pointer *result)
+{
+       size_t          query_string_length = strlen(query_string) + 1;
+       size_t          advice_string_length = strlen(advice_string) + 1;
+       size_t          total_length;
+       pgpa_collected_advice *ca;
+
+       total_length = offsetof(pgpa_collected_advice, textual_data)
+               + query_string_length + advice_string_length;
+
+       if (area == NULL)
+               ca = palloc(total_length);
+       else
+       {
+               *result = dsa_allocate(area, total_length);
+               ca = dsa_get_address(area, *result);
+       }
+
+       ca->userid = GetUserId();
+       ca->dbid = MyDatabaseId;
+       ca->queryid = queryId;
+       ca->timestamp = timestamp;
+       ca->advice_offset = query_string_length;
+
+       memcpy(ca->textual_data, query_string, query_string_length);
+       memcpy(&ca->textual_data[ca->advice_offset],
+                  advice_string, advice_string_length);
+
+       return ca;
+}
+
+/*
+ * Add a pg_collected_advice object to our backend-local advice collection.
+ *
+ * Caller is responsible for switching to the appropriate memory context;
+ * the provided object should have been allocated in that same context.
+ */
+static void
+pgpa_store_local_advice(pgpa_collected_advice *ca)
+{
+       uint64          chunk_number;
+       uint64          chunk_offset;
+       pgpa_local_advice *la = local_collector;
+
+       /* If the local advice collector isn't initialized yet, do that now. */
+       if (la == NULL)
+       {
+               la = palloc0(sizeof(pgpa_local_advice));
+               la->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+               la->chunks = palloc0_array(pgpa_local_advice_chunk *,
+                                                                  la->chunk_array_allocated_size);
+               local_collector = la;
+       }
+
+       /* Compute chunk and offset at which to store this advice. */
+       chunk_number = (la->next_id - la->base_id) / ADVICE_CHUNK_SIZE;
+       chunk_offset = (la->next_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+       /* Extend chunk array, if needed. */
+       if (chunk_number >= la->chunk_array_allocated_size)
+       {
+               int                     new_size;
+
+               new_size = la->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+               la->chunks = repalloc0_array(la->chunks,
+                                                                        pgpa_local_advice_chunk *,
+                                                                        la->chunk_array_allocated_size,
+                                                                        new_size);
+               la->chunk_array_allocated_size = new_size;
+       }
+
+       /* Allocate new chunk, if needed. */
+       if (la->chunks[chunk_number] == NULL)
+               la->chunks[chunk_number] = palloc0_object(pgpa_local_advice_chunk);
+
+       /* Save pointer and bump next-id counter. */
+       Assert(la->chunks[chunk_number]->entries[chunk_offset] == NULL);
+       la->chunks[chunk_number]->entries[chunk_offset] = ca;
+       ++la->next_id;
+
+       /* If we've exceeded the storage limit, discard old data. */
+       pgpa_trim_local_advice(pg_plan_advice_local_collection_limit);
+}
+
+/*
+ * Add a pg_collected_advice object to the shared advice collection.
+ *
+ * 'ca_pointer' should have been allocated from the pg_plan_advice DSA area
+ * and should point to an object of type pgpa_collected_advice.
+ */
+static void
+pgpa_store_shared_advice(dsa_pointer ca_pointer)
+{
+       uint64          chunk_number;
+       uint64          chunk_offset;
+       pgpa_shared_state *state = pg_plan_advice_attach();
+       dsa_area   *area = pg_plan_advice_dsa_area();
+       pgpa_shared_advice *sa = shared_collector;
+       dsa_pointer *chunk_array;
+       pgpa_shared_advice_chunk *chunk;
+
+       /* Lock the shared state. */
+       LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+       /*
+        * If we're not attached to the shared advice collector yet, fix that now.
+        * If we're the first ones to attach, we may need to create the object.
+        */
+       if (sa == NULL)
+       {
+               if (state->shared_collector == InvalidDsaPointer)
+                       state->shared_collector =
+                               dsa_allocate0(area, sizeof(pgpa_shared_advice));
+               shared_collector = sa = dsa_get_address(area, state->shared_collector);
+       }
+
+       /*
+        * It's possible that some other backend may have succeeded in creating
+        * the main collector object but failed to allocate an initial chunk
+        * array, so we must be prepared to allocate the chunk array here whether
+        * or not we created the collector object.
+        */
+       if (shared_collector->chunk_array_allocated_size == 0)
+       {
+               sa->chunks =
+                       dsa_allocate0(area,
+                                                 sizeof(dsa_pointer) * ADVICE_CHUNK_ARRAY_SIZE);
+               sa->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+       }
+
+       /* Compute chunk and offset at which to store this advice. */
+       chunk_number = (sa->next_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+       chunk_offset = (sa->next_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+       /* Get the address of the chunk array and, if needed, extend it. */
+       if (chunk_number >= sa->chunk_array_allocated_size)
+       {
+               int                     new_size;
+               dsa_pointer new_chunks;
+
+               /*
+                * DSA can't enlarge an existing allocation, so we must make a new
+                * allocation and copy data over.
+                */
+               new_size = sa->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+               new_chunks = dsa_allocate0(area, sizeof(dsa_pointer) * new_size);
+               chunk_array = dsa_get_address(area, new_chunks);
+               memcpy(chunk_array, dsa_get_address(area, sa->chunks),
+                          sizeof(dsa_pointer) * sa->chunk_array_allocated_size);
+               dsa_free(area, sa->chunks);
+               sa->chunks = new_chunks;
+               sa->chunk_array_allocated_size = new_size;
+       }
+       else
+               chunk_array = dsa_get_address(area, sa->chunks);
+
+       /* Get the address of the desired chunk, allocating it if needed. */
+       if (chunk_array[chunk_number] == InvalidDsaPointer)
+               chunk_array[chunk_number] =
+                       dsa_allocate0(area, sizeof(pgpa_shared_advice_chunk));
+       chunk = dsa_get_address(area, chunk_array[chunk_number]);
+
+       /* Save pointer and bump next-id counter. */
+       Assert(chunk->entries[chunk_offset] == InvalidDsaPointer);
+       chunk->entries[chunk_offset] = ca_pointer;
+       ++sa->next_id;
+
+       /* If we've exceeded the storage limit, discard old data. */
+       pgpa_trim_shared_advice(area, pg_plan_advice_shared_collection_limit);
+
+       /* Release lock on shared state. */
+       LWLockRelease(&state->lock);
+}
+
+/*
+ * Discard collected advice stored in backend-local memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_local_advice(int limit)
+{
+       pgpa_local_advice *la = local_collector;
+       uint64          current_count;
+       uint64          trim_count;
+       uint64          total_chunk_count;
+       uint64          trim_chunk_count;
+       uint64          remaining_chunk_count;
+
+       /* If we haven't yet reached the limit, there's nothing to do. */
+       current_count = la->next_id - la->oldest_id;
+       if (current_count <= limit)
+               return;
+
+       /* Free enough entries to get us back down to the limit. */
+       trim_count = current_count - limit;
+       while (trim_count > 0)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+
+               chunk_number = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (la->oldest_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+               Assert(la->chunks[chunk_number]->entries[chunk_offset] != NULL);
+               pfree(la->chunks[chunk_number]->entries[chunk_offset]);
+               la->chunks[chunk_number]->entries[chunk_offset] = NULL;
+               ++la->oldest_id;
+               --trim_count;
+       }
+
+       /* Free any chunks that are now entirely unused. */
+       trim_chunk_count = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+       for (uint64 n = 0; n < trim_chunk_count; ++n)
+               pfree(la->chunks[n]);
+
+       /* Slide remaining chunk pointers back toward the base of the array. */
+       total_chunk_count = (la->next_id - la->base_id +
+                                                ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+       remaining_chunk_count = total_chunk_count - trim_chunk_count;
+       if (remaining_chunk_count > 0)
+               memmove(&la->chunks[0], &la->chunks[trim_chunk_count],
+                               sizeof(pgpa_local_advice_chunk *) * remaining_chunk_count);
+
+       /* Don't leave stale pointers around. */
+       memset(&la->chunks[remaining_chunk_count], 0,
+                  sizeof(pgpa_local_advice_chunk *)
+                  * (total_chunk_count - remaining_chunk_count));
+
+       /* Adjust base ID value accordingly. */
+       la->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * Discard collected advice stored in shared memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_shared_advice(dsa_area *area, int limit)
+{
+       pgpa_shared_advice *sa = shared_collector;
+       uint64          current_count;
+       uint64          trim_count;
+       uint64          total_chunk_count;
+       uint64          trim_chunk_count;
+       uint64          remaining_chunk_count;
+       dsa_pointer *chunk_array;
+
+       /* If we haven't yet reached the limit, there's nothing to do. */
+       current_count = sa->next_id - sa->oldest_id;
+       if (current_count <= limit)
+               return;
+
+       /* Get a pointer to the chunk array. */
+       chunk_array = dsa_get_address(area, sa->chunks);
+
+       /* Free enough entries to get us back down to the limit. */
+       trim_count = current_count - limit;
+       while (trim_count > 0)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+               pgpa_shared_advice_chunk *chunk;
+
+               chunk_number = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (sa->oldest_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+               chunk = dsa_get_address(area, chunk_array[chunk_number]);
+               Assert(chunk->entries[chunk_offset] != InvalidDsaPointer);
+               dsa_free(area, chunk->entries[chunk_offset]);
+               chunk->entries[chunk_offset] = InvalidDsaPointer;
+               ++sa->oldest_id;
+               --trim_count;
+       }
+
+       /* Free any chunks that are now entirely unused. */
+       trim_chunk_count = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+       for (uint64 n = 0; n < trim_chunk_count; ++n)
+               dsa_free(area, chunk_array[n]);
+
+       /* Slide remaining chunk pointers back toward the base of the array. */
+       total_chunk_count = (sa->next_id - sa->base_id +
+                                                ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+       remaining_chunk_count = total_chunk_count - trim_chunk_count;
+       if (remaining_chunk_count > 0)
+               memmove(&chunk_array[0], &chunk_array[trim_chunk_count],
+                               sizeof(dsa_pointer) * remaining_chunk_count);
+
+       /* Don't leave stale pointers around. */
+       memset(&chunk_array[remaining_chunk_count], 0,
+                  sizeof(pgpa_shared_advice_chunk *)
+                  * (total_chunk_count - remaining_chunk_count));
+
+       /* Adjust base ID value accordingly. */
+       sa->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_local_advice(PG_FUNCTION_ARGS)
+{
+       if (local_collector != NULL)
+               pgpa_trim_local_advice(0);
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+       pgpa_shared_state *state = pg_plan_advice_attach();
+       dsa_area   *area = pg_plan_advice_dsa_area();
+
+       LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+       /*
+        * If we're not attached to the shared advice collector yet, fix that now;
+        * but if the collector doesn't even exist, we can return without doing
+        * anything else.
+        */
+       if (shared_collector == NULL)
+       {
+               if (state->shared_collector == InvalidDsaPointer)
+               {
+                       LWLockRelease(&state->lock);
+                       return (Datum) 0;
+               }
+               shared_collector = dsa_get_address(area, state->shared_collector);
+       }
+
+       /* Do the real work */
+       pgpa_trim_shared_advice(area, 0);
+
+       LWLockRelease(&state->lock);
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable SRF to return advice collected in backend-local memory
+ */
+Datum
+pg_get_collected_local_advice(PG_FUNCTION_ARGS)
+{
+       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       pgpa_local_advice *la = local_collector;
+       Oid                     userid = GetUserId();
+
+       InitMaterializedSRF(fcinfo, 0);
+
+       if (la == NULL)
+               return (Datum) 0;
+
+       /* Loop over all entries. */
+       for (uint64 id = la->oldest_id; id < la->next_id; ++id)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+               pgpa_collected_advice *ca;
+               Datum           values[PG_GET_ADVICE_COLUMNS];
+               bool            nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+               chunk_number = (id - la->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+               ca = la->chunks[chunk_number]->entries[chunk_offset];
+
+               if (!member_can_set_role(userid, ca->userid))
+                       continue;
+
+               values[0] = UInt64GetDatum(id);
+               values[1] = ObjectIdGetDatum(ca->userid);
+               values[2] = ObjectIdGetDatum(ca->dbid);
+               values[3] = UInt64GetDatum(ca->queryid);
+               values[4] = TimestampGetDatum(ca->timestamp);
+               values[5] = CStringGetTextDatum(query_string(ca));
+               values[6] = CStringGetTextDatum(advice_string(ca));
+
+               tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+                                                        values, nulls);
+       }
+
+       return (Datum) 0;
+}
+
+/*
+ * SQL-callable SRF to return advice collected in shared memory
+ */
+Datum
+pg_get_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       pgpa_shared_state *state = pg_plan_advice_attach();
+       dsa_area   *area = pg_plan_advice_dsa_area();
+       dsa_pointer *chunk_array;
+       pgpa_shared_advice *sa = shared_collector;
+
+       InitMaterializedSRF(fcinfo, 0);
+
+       /* Lock the shared state. */
+       LWLockAcquire(&state->lock, LW_SHARED);
+
+       /*
+        * If we're not attached to the shared advice collector yet, fix that now;
+        * but if the collector doesn't even exist, we can return without doing
+        * anything else.
+        */
+       if (sa == NULL)
+       {
+               if (state->shared_collector == InvalidDsaPointer)
+               {
+                       LWLockRelease(&state->lock);
+                       return (Datum) 0;
+               }
+               shared_collector = sa = dsa_get_address(area, state->shared_collector);
+       }
+
+       /* Get a pointer to the chunk array. */
+       chunk_array = dsa_get_address(area, sa->chunks);
+
+       /* Loop over all entries. */
+       for (uint64 id = sa->oldest_id; id < sa->next_id; ++id)
+       {
+               uint64          chunk_number;
+               uint64          chunk_offset;
+               pgpa_shared_advice_chunk *chunk;
+               pgpa_collected_advice *ca;
+               Datum           values[PG_GET_ADVICE_COLUMNS];
+               bool            nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+               chunk_number = (id - sa->base_id) / ADVICE_CHUNK_SIZE;
+               chunk_offset = (id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+               chunk = dsa_get_address(area, chunk_array[chunk_number]);
+               ca = dsa_get_address(area, chunk->entries[chunk_offset]);
+
+               values[0] = UInt64GetDatum(id);
+               values[1] = ObjectIdGetDatum(ca->userid);
+               values[2] = ObjectIdGetDatum(ca->dbid);
+               values[3] = UInt64GetDatum(ca->queryid);
+               values[4] = TimestampGetDatum(ca->timestamp);
+               values[5] = CStringGetTextDatum(query_string(ca));
+               values[6] = CStringGetTextDatum(advice_string(ca));
+
+               tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+                                                        values, nulls);
+       }
+
+       /* Release lock on shared state. */
+       LWLockRelease(&state->lock);
+
+       return (Datum) 0;
+}
diff --git a/contrib/pg_plan_advice/pgpa_collector.h b/contrib/pg_plan_advice/pgpa_collector.h
new file mode 100644 (file)
index 0000000..b6e746a
--- /dev/null
@@ -0,0 +1,18 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_collector.h
+ *       collect advice into backend-local or shared memory
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_collector.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_COLLECTOR_H
+#define PGPA_COLLECTOR_H
+
+extern void pgpa_collect_advice(uint64 queryId, const char *query_string,
+                                                               const char *advice_string);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_identifier.c b/contrib/pg_plan_advice/pgpa_identifier.c
new file mode 100644 (file)
index 0000000..2fa8075
--- /dev/null
@@ -0,0 +1,476 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.c
+ *       create appropriate identifiers for range table entries
+ *
+ * The goal of this module is to be able to produce identifiers for range
+ * table entries that are unique, understandable to human beings, and
+ * able to be reconstructed during future planning cycles. As an
+ * exception, we do not care about, or want to produce, identifiers for
+ * RTE_JOIN entries. This is because (1) we would end up with a ton of
+ * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
+ * RTEs; and (3) we intend to refer to joins by their constituent members
+ * rather than by reference to the join RTE.
+ *
+ * In general, we construct identifiers of the following form:
+ *
+ * alias_name#occurrence_number/child_table_name@subquery_name
+ *
+ * However, occurrence_number is omitted when it is the first occurrence
+ * within the same subquery, child_table_name is omitted for relations that
+ * are not child tables, and subquery_name is omitted for the topmost
+ * query level. Whenever an item is omitted, the preceding punctuation mark
+ * is also omitted.  Identifier-style escaping is applied to alias_name and
+ * subquery_name.  Whenever we include child_table_name, we always
+ * schema-qualified name, but writing their own plan advice are not required
+ * to do so.  Identifier-style escaping is applied to the schema and to the
+ * relation names separately.
+ *
+ * The upshot of all of these rules is that in simple cases, the relation
+ * identifier is textually identical to the alias name, making life easier
+ * for users. However, even in complex cases, every relation identifier
+ * for a given query will be unique (or at least we hope so: if not, this
+ * code is buggy and the identifier format might need to be rethought).
+ *
+ * A key goal of this system is that we want to be able to reconstruct the
+ * same identifiers during a future planning cycle for the same query, so
+ * that if a certain behavior is specified for a certain identifier, we can
+ * properly identify the RTI for which that behavior is mandated. In order
+ * for this to work, subquery names must be unique and known before the
+ * subquery is planned, and the remainder of the identifier must not depend
+ * on any part of the query outside of the current subquery level. In
+ * particular, occurrence_number must be calculated relative to the range
+ * table for the relevant subquery, not the final flattened range table.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_identifier.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_identifier.h"
+
+#include "parser/parsetree.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable,
+                                                                         List *appinfos);
+static int     pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+                                                                  SubPlanRTInfo *rtinfo, Index rti);
+
+/*
+ * Create a range table identifier from scratch.
+ *
+ * This function leaves the caller to do all the heavy lifting, so it's
+ * generally better to use one of the functions below instead.
+ *
+ * See the file header comments for more details on the format of an
+ * identifier.
+ */
+const char *
+pgpa_identifier_string(const pgpa_identifier *rid)
+{
+       const char *result;
+
+       Assert(rid->alias_name != NULL);
+       result = quote_identifier(rid->alias_name);
+
+       Assert(rid->occurrence >= 0);
+       if (rid->occurrence > 1)
+               result = psprintf("%s#%d", result, rid->occurrence);
+
+       if (rid->partrel != NULL)
+       {
+               if (rid->partnsp == NULL)
+                       result = psprintf("%s/%s", result,
+                                                         quote_identifier(rid->partnsp));
+               else
+                       result = psprintf("%s/%s.%s", result,
+                                                         quote_identifier(rid->partnsp),
+                                                         quote_identifier(rid->partrel));
+       }
+
+       if (rid->plan_name != NULL)
+               result = psprintf("%s@%s", result, quote_identifier(rid->plan_name));
+
+       return result;
+}
+
+/*
+ * Compute a relation identifier for a particular RTI.
+ *
+ * The caller provides root and rti, and gets the necessary details back via
+ * the remaining parameters.
+ */
+void
+pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
+                                                          pgpa_identifier *rid)
+{
+       Index           top_rti = rti;
+       int                     occurrence = 1;
+       RangeTblEntry *rte;
+       RangeTblEntry *top_rte;
+       char       *partnsp = NULL;
+       char       *partrel = NULL;
+
+       /*
+        * If this is a child RTE, find the topmost parent that is still of type
+        * RTE_RELATION. We do this because we identify children of partitioned
+        * tables by the name of the child table, but subqueries can also have
+        * child rels and we don't care about those here.
+        */
+       for (;;)
+       {
+               AppendRelInfo *appinfo;
+               RangeTblEntry *parent_rte;
+
+               /* append_rel_array can be NULL if there are no children */
+               if (root->append_rel_array == NULL ||
+                       (appinfo = root->append_rel_array[top_rti]) == NULL)
+                       break;
+
+               parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
+               if (parent_rte->rtekind != RTE_RELATION)
+                       break;
+
+               top_rti = appinfo->parent_relid;
+       }
+
+       /* Get the range table entries for the RTI and top RTI. */
+       rte = planner_rt_fetch(rti, root);
+       top_rte = planner_rt_fetch(top_rti, root);
+       Assert(rte->rtekind != RTE_JOIN);
+       Assert(top_rte->rtekind != RTE_JOIN);
+
+       /* Work out the correct occurrence number. */
+       for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti)
+       {
+               RangeTblEntry *prior_rte;
+               AppendRelInfo *appinfo;
+
+               /*
+                * If this is a child rel of a parent that is a relation, skip it.
+                *
+                * Such range table entries are disambiguated by mentioning the schema
+                * and name of the table, not by counting them as separate occurrences
+                * of the same table.
+                *
+                * NB: append_rel_array can be NULL if there are no children
+                */
+               if (root->append_rel_array != NULL &&
+                       (appinfo = root->append_rel_array[prior_rti]) != NULL)
+               {
+                       RangeTblEntry *parent_rte;
+
+                       parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
+                       if (parent_rte->rtekind == RTE_RELATION)
+                               continue;
+               }
+
+               /* Skip NULL entries and joins. */
+               prior_rte = planner_rt_fetch(prior_rti, root);
+               if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN)
+                       continue;
+
+               /* Skip if the alias name differs. */
+               if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+                       continue;
+
+               /* Looks like a true duplicate. */
+               ++occurrence;
+       }
+
+       /* If this is a child table, get the schema and relation names. */
+       if (rti != top_rti)
+       {
+               partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+               partrel = get_rel_name(rte->relid);
+       }
+
+       /* OK, we have all the answers we need. Return them to the caller. */
+       rid->alias_name = top_rte->eref->aliasname;
+       rid->occurrence = occurrence;
+       rid->partnsp = partnsp;
+       rid->partrel = partrel;
+       rid->plan_name = root->plan_name;
+}
+
+/*
+ * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN
+ * RTIs that may be present.
+ *
+ * RTE_JOIN entries are excluded because they cannot be mentioned by plan
+ * advice.
+ *
+ * The caller is responsible for making sure that the tkeys array is large
+ * enough to store the results.
+ *
+ * The return value is the number of identifiers computed.
+ */
+int
+pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids,
+                                                                  pgpa_identifier *rids)
+{
+       int                     count = 0;
+       int                     rti = -1;
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               RangeTblEntry *rte = planner_rt_fetch(rti, root);
+
+               if (rte->rtekind == RTE_JOIN)
+                       continue;
+               pgpa_compute_identifier_by_rti(root, rti, &rids[count++]);
+       }
+
+       Assert(count > 0);
+       return count;
+}
+
+/*
+ * Create an array of range table identifiers for all the non-NULL,
+ * non-RTE_JOIN entries in the PlannedStmt's range table.
+ */
+pgpa_identifier *
+pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
+{
+       Index           rtable_length = list_length(pstmt->rtable);
+       pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length);
+       Index      *top_rti_map;
+       int                     rtinfoindex = 0;
+       SubPlanRTInfo *rtinfo = NULL;
+       SubPlanRTInfo *nextrtinfo = NULL;
+
+       /*
+        * Account for relations addded by inheritance expansion of partitioned
+        * tables.
+        */
+       top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable,
+                                                                                 pstmt->appendRelations);
+
+       /*
+        * When we begin iterating, we're processing the portion of the range
+        * table that originated from the top-level PlannerInfo, so subrtinfo is
+        * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
+        * portion of the range table we are processing. nextrtinfo is always the
+        * SubPlanRTInfo that follows the current one, if any, so when we're
+        * processing the top-level query's portion of the range table, the next
+        * SubPlanRTInfo is the very first one.
+        */
+       if (pstmt->subrtinfos != NULL)
+               nextrtinfo = linitial(pstmt->subrtinfos);
+
+       /* Main loop over the range table. */
+       for (Index rti = 1; rti <= rtable_length; rti++)
+       {
+               const char *plan_name;
+               Index           top_rti;
+               RangeTblEntry *rte;
+               RangeTblEntry *top_rte;
+               char       *partnsp = NULL;
+               char       *partrel = NULL;
+               int                     occurrence;
+               pgpa_identifier *rid;
+
+               /*
+                * Advance to the next SubPlanRTInfo, if it's time to do that.
+                *
+                * This loop probably shouldn't ever iterate more than once, because
+                * that would imply that a subquery was planned but added nothing to
+                * the range table; but let's be defensive and assume it can happen.
+                */
+               while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
+               {
+                       rtinfo = nextrtinfo;
+                       if (++rtinfoindex >= list_length(pstmt->subrtinfos))
+                               nextrtinfo = NULL;
+                       else
+                               nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex);
+               }
+
+               /* Fetch the range table entry, if any. */
+               rte = rt_fetch(rti, pstmt->rtable);
+
+               /*
+                * We can't and don't need to identify null entries, and we don't want
+                * to identify join entries.
+                */
+               if (rte == NULL || rte->rtekind == RTE_JOIN)
+                       continue;
+
+               /*
+                * If this is not a relation added by partitioned table expansion,
+                * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
+                * we need the information for the top RTI/RTE, and must also fetch
+                * the partition schema and name.
+                */
+               top_rti = top_rti_map[rti - 1];
+               if (rti == top_rti)
+                       top_rte = rte;
+               else
+               {
+                       top_rte = rt_fetch(top_rti, pstmt->rtable);
+                       partnsp =
+                               get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+                       partrel = get_rel_name(rte->relid);
+               }
+
+               /* Compute the correct occurrence number. */
+               occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
+                                                                                       rtinfo, top_rti);
+
+               /* Get the name of the current plan (NULL for toplevel query). */
+               plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
+
+               /* Save all the details we've derived. */
+               rid = &result[rti - 1];
+               rid->alias_name = top_rte->eref->aliasname;
+               rid->occurrence = occurrence;
+               rid->partnsp = partnsp;
+               rid->partrel = partrel;
+               rid->plan_name = plan_name;
+       }
+
+       return result;
+}
+
+/*
+ * Search for a pgpa_identifier in the array of identifiers computed for the
+ * range table. If exactly one match is found, return the matching RTI; else
+ * return 0.
+ */
+Index
+pgpa_compute_rti_from_identifier(int rtable_length,
+                                                                pgpa_identifier *rt_identifiers,
+                                                                pgpa_identifier *rid)
+{
+       Index           result = 0;
+
+       for (Index rti = 1; rti <= rtable_length; ++rti)
+       {
+               pgpa_identifier *rti_rid = &rt_identifiers[rti - 1];
+
+               /* If there's no identifier for this RTI, skip it. */
+               if (rti_rid->alias_name == NULL)
+                       continue;
+
+               /*
+                * If it matches, return this RTI. As usual, an omitted partition
+                * schema matches anything, but partition and plan names must either
+                * match exactly or be omitted on both sides.
+                */
+               if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 &&
+                       rid->occurrence == rti_rid->occurrence &&
+                       (rid->partnsp == NULL || rti_rid->partnsp == NULL ||
+                        strcmp(rid->partnsp, rti_rid->partnsp) == 0) &&
+                       strings_equal_or_both_null(rid->partrel, rti_rid->partrel) &&
+                       strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name))
+               {
+                       if (result != 0)
+                       {
+                               /* Multiple matches were found. */
+                               return 0;
+                       }
+                       result = rti;
+               }
+       }
+
+       return result;
+}
+
+/*
+ * Build a mapping from each RTI to the RTI whose alias_name will be used to
+ * construct the range table identifier.
+ *
+ * For child relations, this is the topmost parent that is still of type
+ * RTE_RELATION. For other relations, it's just the original RTI.
+ *
+ * Since we're eventually going to need this information for every RTI in
+ * the range table, it's best to compute all the answers in a single pass over
+ * the AppendRelInfo list. Otherwise, we might end up searching through that
+ * list repeatedly for entries of interest.
+ *
+ * Note that the returned array is uses zero-based indexing, while RTIs use
+ * 1-based indexing, so subtract 1 from the RTI before looking it up in the
+ * array.
+ */
+static Index *
+pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
+{
+       Index      *top_rti_map = palloc0_array(Index, rtable_length);
+
+       /* Initially, make every RTI point to itself. */
+       for (Index rti = 1; rti <= rtable_length; ++rti)
+               top_rti_map[rti - 1] = rti;
+
+       /* Update the map for each AppendRelInfo object. */
+       foreach_node(AppendRelInfo, appinfo, appinfos)
+       {
+               Index           parent_rti = appinfo->parent_relid;
+               RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable);
+
+               /* If the parent is not RTE_RELATION, ignore this entry. */
+               if (parent_rte->rtekind != RTE_RELATION)
+                       continue;
+
+               /*
+                * Map the child to wherever we mapped the parent. Parents always
+                * precede their children in the AppendRelInfo list, so this should
+                * work out.
+                */
+               top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
+       }
+
+       return top_rti_map;
+}
+
+/*
+ * Find the occurence number of a certain relation within a certain subquery.
+ *
+ * The same alias name can occur multiple times within a subquery, but we want
+ * to disambiguate by giving different occurrences different integer indexes.
+ * However, child tables are disambiguated by including the table name rather
+ * than by incrementing the occurrence number; and joins are not named and so
+ * shouldn't increment the occurence number either.
+ */
+static int
+pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+                                          SubPlanRTInfo *rtinfo, Index rti)
+{
+       Index           rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
+       int                     occurrence = 1;
+       RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+       for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
+       {
+               RangeTblEntry *prior_rte;
+
+               /*
+                * If this is a child rel of a parent that is a relation, skip it.
+                *
+                * Such range table entries are disambiguated by mentioning the schema
+                * and name of the table, not by counting them as separate occurrences
+                * of the same table.
+                */
+               if (top_rti_map[prior_rti - 1] != prior_rti)
+                       break;
+
+               /* Skip joins. */
+               prior_rte = rt_fetch(prior_rti, rtable);
+               if (prior_rte->rtekind == RTE_JOIN)
+                       continue;
+
+               /* Skip if the alias name differs. */
+               if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+                       continue;
+
+               /* Looks like a true duplicate. */
+               ++occurrence;
+       }
+
+       return occurrence;
+}
diff --git a/contrib/pg_plan_advice/pgpa_identifier.h b/contrib/pg_plan_advice/pgpa_identifier.h
new file mode 100644 (file)
index 0000000..b000d2b
--- /dev/null
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.h
+ *       create appropriate identifiers for range table entries
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_identifier.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PGPA_IDENTIFIER_H
+#define PGPA_IDENTIFIER_H
+
+#include "nodes/pathnodes.h"
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_identifier
+{
+       const char *alias_name;
+       int                     occurrence;
+       const char *partnsp;
+       const char *partrel;
+       const char *plan_name;
+} pgpa_identifier;
+
+/* Convenience function for comparing possibly-NULL strings. */
+static inline bool
+strings_equal_or_both_null(const char *a, const char *b)
+{
+       if (a == b)
+               return true;
+       else if (a == NULL || b == NULL)
+               return false;
+       else
+               return strcmp(a, b) == 0;
+}
+
+extern const char *pgpa_identifier_string(const pgpa_identifier *rid);
+extern void pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
+                                                                                  pgpa_identifier *rid);
+extern int     pgpa_compute_identifiers_by_relids(PlannerInfo *root,
+                                                                                          Bitmapset *relids,
+                                                                                          pgpa_identifier *rids);
+extern pgpa_identifier *pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt);
+
+extern Index pgpa_compute_rti_from_identifier(int rtable_length,
+                                                                                         pgpa_identifier *rt_identifiers,
+                                                                                         pgpa_identifier *rid);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c
new file mode 100644 (file)
index 0000000..2861876
--- /dev/null
@@ -0,0 +1,615 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.c
+ *       analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_join.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/pathnodes.h"
+#include "nodes/print.h"
+#include "parser/parsetree.h"
+
+/*
+ * Temporary object used when unrolling a join tree.
+ */
+struct pgpa_join_unroller
+{
+       unsigned        nallocated;
+       unsigned        nused;
+       Plan       *outer_subplan;
+       ElidedNode *outer_elided_node;
+       bool            outer_beneath_any_gather;
+       pgpa_join_strategy *strategy;
+       Plan      **inner_subplans;
+       ElidedNode **inner_elided_nodes;
+       pgpa_join_unroller **inner_unrollers;
+       bool       *inner_beneath_any_gather;
+};
+
+static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker,
+                                                                                         Plan *plan,
+                                                                                         Plan **realouter,
+                                                                                         Plan **realinner,
+                                                                                         ElidedNode **elidedrealouter,
+                                                                                         ElidedNode **elidedrealinner,
+                                                                                         bool *found_any_outer_gather,
+                                                                                         bool *found_any_inner_gather);
+static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan);
+static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan,
+                                                                                  bool *found_any_gather);
+static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+                                                                       ElidedNode **elided_node);
+
+static bool is_result_node_with_child(Plan *plan);
+static bool is_sorting_plan(Plan *plan);
+
+/*
+ * Create an initially-empty object for unrolling joins.
+ *
+ * This function creates a helper object that can later be used to create a
+ * pgpa_unrolled_join, after first calling pgpa_unroll_join one or more times.
+ */
+pgpa_join_unroller *
+pgpa_create_join_unroller(void)
+{
+       pgpa_join_unroller *join_unroller;
+
+       join_unroller = palloc0_object(pgpa_join_unroller);
+       join_unroller->nallocated = 4;
+       join_unroller->strategy =
+               palloc_array(pgpa_join_strategy, join_unroller->nallocated);
+       join_unroller->inner_subplans =
+               palloc_array(Plan *, join_unroller->nallocated);
+       join_unroller->inner_elided_nodes =
+               palloc_array(ElidedNode *, join_unroller->nallocated);
+       join_unroller->inner_unrollers =
+               palloc_array(pgpa_join_unroller *, join_unroller->nallocated);
+       join_unroller->inner_beneath_any_gather =
+               palloc_array(bool, join_unroller->nallocated);
+
+       return join_unroller;
+}
+
+/*
+ * Unroll one level of an unrollable join tree.
+ *
+ * Our basic goal here is to unroll join trees as they occur in the Plan
+ * tree into a simpler and more regular structure that we can more easily
+ * use for further processing. Unrolling is outer-deep, so if the plan tree
+ * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be
+ * used for Join1 and Join2, but a different one will be needed for Join3,
+ * since that involves a join within the *inner* side of another join.
+ *
+ * pgpa_plan_walker creates a "top level" join unroller object when it
+ * encounters a join in a portion of the plan tree in which no join unroller
+ * is already active. From there, this function is responsible for determing
+ * to what portion of the plan tree that join unroller applies, and for
+ * creating any subordinate join unroller objects that are needed as a result
+ * of non-outer-deep join trees. We do this by returning the join unroller
+ * objects that should be used for further traversal of the outer and inner
+ * subtrees of the current plan node via *outer_join_unroller and
+ * *inner_join_unroller, respectively.
+ */
+void
+pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan,
+                                bool beneath_any_gather,
+                                pgpa_join_unroller *join_unroller,
+                                pgpa_join_unroller **outer_join_unroller,
+                                pgpa_join_unroller **inner_join_unroller)
+{
+       pgpa_join_strategy strategy;
+       Plan       *realinner,
+                          *realouter;
+       ElidedNode *elidedinner,
+                          *elidedouter;
+       int                     n;
+       bool            found_any_outer_gather = false;
+       bool            found_any_inner_gather = false;
+
+       Assert(join_unroller != NULL);
+
+       /*
+        * We need to pass the join_unroller object down through certain types of
+        * plan nodes -- anything that's considered part of the join strategy, and
+        * any other nodes that can occur in a join tree despite not being scans
+        * or joins.
+        *
+        * This includes:
+        *
+        * (1) Materialize, Memoize, and Hash nodes, which are part of the join
+        * strategy,
+        *
+        * (2) Gather and Gather Merge nodes, which can occur at any point in the
+        * join tree where the planner decided to initiate parallelism,
+        *
+        * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin
+        * or GatherMerge,
+        *
+        * (4) Agg and Unique nodes, which can occur when we decide to make the
+        * nullable side of a semijoin unique and then join the result, and
+        *
+        * (5) Result nodes with children, which can be added either to project to
+        * enforce a one-time filter (but Result nodes without children are
+        * degenerate scans or joins).
+        */
+       if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash)
+               || IsA(plan, Gather) || IsA(plan, GatherMerge)
+               || is_sorting_plan(plan) || IsA(plan, Agg) || IsA(plan, Unique)
+               || is_result_node_with_child(plan))
+       {
+               *outer_join_unroller = join_unroller;
+               return;
+       }
+
+       /*
+        * Since we've already handled nodes that require pass-through treatment,
+        * this should be an unrollable join.
+        */
+       strategy = pgpa_decompose_join(walker, plan,
+                                                                  &realouter, &realinner,
+                                                                  &elidedouter, &elidedinner,
+                                                                  &found_any_outer_gather,
+                                                                  &found_any_inner_gather);
+
+       /* If our workspace is full, expand it. */
+       if (join_unroller->nused >= join_unroller->nallocated)
+       {
+               join_unroller->nallocated *= 2;
+               join_unroller->strategy =
+                       repalloc_array(join_unroller->strategy,
+                                                  pgpa_join_strategy,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_subplans =
+                       repalloc_array(join_unroller->inner_subplans,
+                                                  Plan *,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_elided_nodes =
+                       repalloc_array(join_unroller->inner_elided_nodes,
+                                                  ElidedNode *,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_beneath_any_gather =
+                       repalloc_array(join_unroller->inner_beneath_any_gather,
+                                                  bool,
+                                                  join_unroller->nallocated);
+               join_unroller->inner_unrollers =
+                       repalloc_array(join_unroller->inner_unrollers,
+                                                  pgpa_join_unroller *,
+                                                  join_unroller->nallocated);
+       }
+
+       /*
+        * Since we're flattening outer-deep join trees, it follows that if the
+        * outer side is still an unrollable join, it should be unrolled into this
+        * same object. Otherwise, we've reached the limit of what we can unroll
+        * into this object and must remember the outer side as the final outer
+        * subplan.
+        */
+       if (elidedouter == NULL && pgpa_is_join(realouter))
+               *outer_join_unroller = join_unroller;
+       else
+       {
+               join_unroller->outer_subplan = realouter;
+               join_unroller->outer_elided_node = elidedouter;
+               join_unroller->outer_beneath_any_gather =
+                       beneath_any_gather || found_any_outer_gather;
+       }
+
+       /*
+        * Store the inner subplan. If it's an unrollable join, it needs to be
+        * flattened in turn, but into a new unroller object, not this one.
+        */
+       n = join_unroller->nused++;
+       join_unroller->strategy[n] = strategy;
+       join_unroller->inner_subplans[n] = realinner;
+       join_unroller->inner_elided_nodes[n] = elidedinner;
+       join_unroller->inner_beneath_any_gather[n] =
+               beneath_any_gather || found_any_inner_gather;
+       if (elidedinner == NULL && pgpa_is_join(realinner))
+               *inner_join_unroller = pgpa_create_join_unroller();
+       else
+               *inner_join_unroller = NULL;
+       join_unroller->inner_unrollers[n] = *inner_join_unroller;
+}
+
+/*
+ * Use the data we've accumulated in a pgpa_join_unroller object to construct
+ * a pgpa_unrolled_join.
+ */
+pgpa_unrolled_join *
+pgpa_build_unrolled_join(pgpa_plan_walker_context *walker,
+                                                pgpa_join_unroller *join_unroller)
+{
+       pgpa_unrolled_join *ujoin;
+       int                     i;
+
+       /*
+        * We shouldn't have gone even so far as to create a join unroller unless
+        * we found at least one unrollable join.
+        */
+       Assert(join_unroller->nused > 0);
+
+       /* Allocate result structures. */
+       ujoin = palloc0_object(pgpa_unrolled_join);
+       ujoin->ninner = join_unroller->nused;
+       ujoin->strategy = palloc0_array(pgpa_join_strategy, join_unroller->nused);
+       ujoin->inner = palloc0_array(pgpa_join_member, join_unroller->nused);
+
+       /* Handle the outermost join. */
+       ujoin->outer.plan = join_unroller->outer_subplan;
+       ujoin->outer.elided_node = join_unroller->outer_elided_node;
+       ujoin->outer.scan =
+               pgpa_build_scan(walker, ujoin->outer.plan,
+                                               ujoin->outer.elided_node,
+                                               join_unroller->outer_beneath_any_gather,
+                                               true);
+
+       /*
+        * We want the joins from the deepest part of the plan tree to appear
+        * first in the result object, but the join unroller adds them in exactly
+        * the reverse of that order, so we need to flip the order of the arrays
+        * when constructing the final result.
+        */
+       for (i = 0; i < join_unroller->nused; ++i)
+       {
+               int                     k = join_unroller->nused - i - 1;
+
+               /* Copy strategy, Plan, and ElidedNode. */
+               ujoin->strategy[i] = join_unroller->strategy[k];
+               ujoin->inner[i].plan = join_unroller->inner_subplans[k];
+               ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k];
+
+               /*
+                * Fill in remaining details, using either the nested join unroller,
+                * or by deriving them from the plan and elided nodes.
+                */
+               if (join_unroller->inner_unrollers[k] != NULL)
+                       ujoin->inner[i].unrolled_join =
+                               pgpa_build_unrolled_join(walker,
+                                                                                join_unroller->inner_unrollers[k]);
+               else
+                       ujoin->inner[i].scan =
+                               pgpa_build_scan(walker, ujoin->inner[i].plan,
+                                                               ujoin->inner[i].elided_node,
+                                                               join_unroller->inner_beneath_any_gather[i],
+                                                               true);
+       }
+
+       return ujoin;
+}
+
+/*
+ * Free memory allocated for pgpa_join_unroller.
+ */
+void
+pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller)
+{
+       pfree(join_unroller->strategy);
+       pfree(join_unroller->inner_subplans);
+       pfree(join_unroller->inner_elided_nodes);
+       pfree(join_unroller->inner_unrollers);
+       pfree(join_unroller);
+}
+
+/*
+ * Identify the join strategy used by a join and the "real" inner and outer
+ * plans.
+ *
+ * For example, a Hash Join always has a Hash node on the inner side, but
+ * for all intents and purposes the real inner input is the Hash node's child,
+ * not the Hash node itself.
+ *
+ * Likewise, a Merge Join may have Sort note on the inner or outer side; if
+ * it does, the real input to the join is the Sort node's child, not the
+ * Sort node itself.
+ *
+ * In addition, with a Merge Join or a Nested Loop, the join planning code
+ * may add additional nodes such as Materialize or Memoize. We regard these
+ * as an aspect of the join strategy. As in the previous cases, the true input
+ * to the join is the underlying node.
+ *
+ * However, if any involved child node previously had a now-elided node stacked
+ * on top, then we can't "look through" that node -- indeed, what's going to be
+ * relevant for our purposes is the ElidedNode on top of that plan node, rather
+ * than the plan node itself.
+ *
+ * If there are multiple elided nodes, we want that one that would have been
+ * uppermost in the plan tree prior to setrefs processing; we expect to find
+ * that one last in the list of elided nodes.
+ *
+ * On return *realouter and *realinner will have been set to the real inner
+ * and real outer plans that we identified, and *elidedrealouter and
+ * *elidedrealinner to the last of any correspoding elided nodes.
+ * Additionally, *found_any_outer_gather and *found_any_inner_gather will
+ * be set to true if we looked through a Gather or Gather Merge node on
+ * that side of the join, and false otherwise.
+ */
+static pgpa_join_strategy
+pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan,
+                                       Plan **realouter, Plan **realinner,
+                                       ElidedNode **elidedrealouter, ElidedNode **elidedrealinner,
+                                       bool *found_any_outer_gather, bool *found_any_inner_gather)
+{
+       PlannedStmt *pstmt = walker->pstmt;
+       JoinType        jointype = ((Join *) plan)->jointype;
+       Plan       *outerplan = plan->lefttree;
+       Plan       *innerplan = plan->righttree;
+       ElidedNode *elidedouter;
+       ElidedNode *elidedinner;
+       pgpa_join_strategy strategy;
+       bool            uniqueouter;
+       bool            uniqueinner;
+
+       elidedouter = pgpa_last_elided_node(pstmt, outerplan);
+       elidedinner = pgpa_last_elided_node(pstmt, innerplan);
+       *found_any_outer_gather = false;
+       *found_any_inner_gather = false;
+
+       switch (nodeTag(plan))
+       {
+               case T_MergeJoin:
+
+                       /*
+                        * The planner may have chosen to place a Material node on the
+                        * inner side of the MergeJoin; if this is present, we record it
+                        * as part of the join strategy.
+                        */
+                       if (elidedinner == NULL && IsA(innerplan, Material))
+                       {
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                               strategy = JSTRAT_MERGE_JOIN_MATERIALIZE;
+                       }
+                       else
+                               strategy = JSTRAT_MERGE_JOIN_PLAIN;
+
+                       /*
+                        * For a MergeJoin, either the outer or the inner subplan, or
+                        * both, may have needed to be sorted; we must disregard any Sort
+                        * or IncrementalSort node to find the real inner or outer
+                        * subplan.
+                        */
+                       if (elidedouter == NULL && is_sorting_plan(outerplan))
+                               elidedouter = pgpa_descend_node(pstmt, &outerplan);
+                       if (elidedinner == NULL && is_sorting_plan(innerplan))
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                       break;
+
+               case T_NestLoop:
+
+                       /*
+                        * The planner may have chosen to place a Material or Memoize node
+                        * on the inner side of the NestLoop; if this is present, we
+                        * record it as part of the join strategy.
+                        */
+                       if (elidedinner == NULL && IsA(innerplan, Material))
+                       {
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                               strategy = JSTRAT_NESTED_LOOP_MATERIALIZE;
+                       }
+                       else if (elidedinner == NULL && IsA(innerplan, Memoize))
+                       {
+                               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                               strategy = JSTRAT_NESTED_LOOP_MEMOIZE;
+                       }
+                       else
+                               strategy = JSTRAT_NESTED_LOOP_PLAIN;
+                       break;
+
+               case T_HashJoin:
+
+                       /*
+                        * The inner subplan of a HashJoin is always a Hash node; the real
+                        * inner subplan is the Hash node's child.
+                        */
+                       Assert(IsA(innerplan, Hash));
+                       Assert(elidedinner == NULL);
+                       elidedinner = pgpa_descend_node(pstmt, &innerplan);
+                       strategy = JSTRAT_HASH_JOIN;
+                       break;
+
+               default:
+                       elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
+       }
+
+       /*
+        * The planner may have decided to implement a semijoin by first making
+        * the nullable side of the plan unique, and then performing a normal join
+        * against the result. Therefore, we might need to descend through a
+        * unique node on either side of the plan.
+        */
+       uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter);
+       uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner);
+
+       /*
+        * The planner may have decided to parallelize part of the join tree, so
+        * we could find a Gather or Gather Merge node here. Note that, if
+        * present, this will appear below nodes we considered as part of the join
+        * strategy, but we could find another uniqueness-enforcing node below the
+        * Gather or Gather Merge, if present.
+        */
+       if (elidedouter == NULL)
+       {
+               elidedouter = pgpa_descend_any_gather(pstmt, &outerplan,
+                                                                                         found_any_outer_gather);
+               if (found_any_outer_gather &&
+                       pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter))
+                       uniqueouter = true;
+       }
+       if (elidedinner == NULL)
+       {
+               elidedinner = pgpa_descend_any_gather(pstmt, &innerplan,
+                                                                                         found_any_inner_gather);
+               if (found_any_inner_gather &&
+                       pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner))
+                       uniqueinner = true;
+       }
+
+       /*
+        * It's possible that Result node has been inserted either to project a
+        * target list or to implement a one-time filter. If so, we can descend
+        * throught it. Note that a result node without a child would be a
+        * degenerate scan or join, and not something we could descend through.
+        *
+        * XXX. I suspect it's possible for this to happen above the Gather or
+        * Gather Merge node, too, but apparently we have no test case for that
+        * scenario.
+        */
+       if (elidedouter == NULL && is_result_node_with_child(outerplan))
+               elidedouter = pgpa_descend_node(pstmt, &outerplan);
+       if (elidedinner == NULL && is_result_node_with_child(innerplan))
+               elidedinner = pgpa_descend_node(pstmt, &innerplan);
+
+       /*
+        * If this is a semijoin that was converted to an inner join by making one
+        * side or the other unique, make a note that the inner or outer subplan,
+        * as appropriate, should be treated as a query plan feature when the main
+        * tree traversal reaches it.
+        *
+        * Conversely, if the planner could have made one side of the join unique
+        * and thereby converted it to an inner join, and chose not to do so, that
+        * is also worth noting.
+        *
+        * XXX: We admit too much non-unique advice, as in the following example
+        * from the regression tests: EXPLAIN (PLAN_ADVICE, COSTS OFF) DELETE FROM
+        * prt1_l WHERE EXISTS (SELECT 1 FROM int4_tbl, LATERAL (SELECT
+        * int4_tbl.f1 FROM int8_tbl LIMIT 2) ss WHERE prt1_l.c IS NULL). We emit
+        * SEMIJOIN_NON_UNIQUE((int4_tbl ss)) but create_unique_path() fails in
+        * this case, so there's no sj-unique version possible.
+        *
+        * NB: This code could appear slightly higher up in in this function, but
+        * none of the nodes through which we just descended should be have
+        * associated RTIs.
+        *
+        * NB: This seems like a somewhat hacky way of passing information up to
+        * the main tree walk, but I don't currently have a better idea.
+        */
+       if (uniqueouter)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, outerplan);
+       else if (jointype == JOIN_RIGHT_SEMI)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, outerplan);
+       if (uniqueinner)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, innerplan);
+       else if (jointype == JOIN_SEMI)
+               pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, innerplan);
+
+       /* Set output parameters. */
+       *realouter = outerplan;
+       *realinner = innerplan;
+       *elidedrealouter = elidedouter;
+       *elidedrealinner = elidedinner;
+       return strategy;
+}
+
+/*
+ * Descend through a Plan node in a join tree that the caller has determined
+ * to be irrelevant.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node.
+ */
+static ElidedNode *
+pgpa_descend_node(PlannedStmt *pstmt, Plan **plan)
+{
+       *plan = (*plan)->lefttree;
+       return pgpa_last_elided_node(pstmt, *plan);
+}
+
+/*
+ * Descend through a Gather or Gather Merge node, if present, and any Sort
+ * or IncrementalSort node occurring under a Gather Merge.
+ *
+ * Caller should have verified that there is no ElidedNode pertaining to
+ * the initial value of *plan.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node. Sets *found_any_gather = true if either Gather or
+ * Gather Merge was found, and otherwise leaves it unchanged.
+ */
+static ElidedNode *
+pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan,
+                                               bool *found_any_gather)
+{
+       if (IsA(*plan, Gather))
+       {
+               *found_any_gather = true;
+               return pgpa_descend_node(pstmt, plan);
+       }
+
+       if (IsA(*plan, GatherMerge))
+       {
+               ElidedNode *elided = pgpa_descend_node(pstmt, plan);
+
+               if (elided == NULL && is_sorting_plan(*plan))
+                       elided = pgpa_descend_node(pstmt, plan);
+
+               *found_any_gather = true;
+               return elided;
+       }
+
+       return NULL;
+}
+
+/*
+ * If *plan is an Agg or Unique node, we want to descend through it, unless
+ * it has a corresponding elided node. If its immediate child is a Sort or
+ * IncrementalSort, we also want to descend through that, unless it has a
+ * corresponding elided node.
+ *
+ * On entry, *elided_node must be the last of any elided nodes corresponding
+ * to *plan; on exit, this will still be true, but *plan may have been updated.
+ *
+ * The reason we don't want to descend through elided nodes is that a single
+ * join tree can't cross through any sort of elided node: subqueries are
+ * planned separately, and planning inside an Append or MergeAppend is
+ * separate from planning outside of it.
+ *
+ * The return value is true if we descend through at least one node, and
+ * otherwise false.
+ */
+static bool
+pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+                                               ElidedNode **elided_node)
+{
+       if (*elided_node != NULL)
+               return false;
+
+       if (IsA(*plan, Agg) || IsA(*plan, Unique))
+       {
+               *elided_node = pgpa_descend_node(pstmt, plan);
+
+               if (*elided_node == NULL && is_sorting_plan(*plan))
+                       *elided_node = pgpa_descend_node(pstmt, plan);
+
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * Is this a Result node that has a child?
+ */
+static bool
+is_result_node_with_child(Plan *plan)
+{
+       return IsA(plan, Result) && plan->lefttree != NULL;
+}
+
+/*
+ * Is this a Plan node whose purpose is put the data in a certain order?
+ */
+static bool
+is_sorting_plan(Plan *plan)
+{
+       return IsA(plan, Sort) || IsA(plan, IncrementalSort);
+}
diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h
new file mode 100644 (file)
index 0000000..4dc7298
--- /dev/null
@@ -0,0 +1,105 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.h
+ *       analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_join.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_JOIN_H
+#define PGPA_JOIN_H
+
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_plan_walker_context pgpa_plan_walker_context;
+typedef struct pgpa_join_unroller pgpa_join_unroller;
+typedef struct pgpa_unrolled_join pgpa_unrolled_join;
+
+/*
+ * Although there are three main join strategies, we try to classify things
+ * more precisely here: merge joins have the option of using materialization
+ * on the inner side, and nested loops can use either materialization or
+ * memoization.
+ */
+typedef enum
+{
+       JSTRAT_MERGE_JOIN_PLAIN = 0,
+       JSTRAT_MERGE_JOIN_MATERIALIZE,
+       JSTRAT_NESTED_LOOP_PLAIN,
+       JSTRAT_NESTED_LOOP_MATERIALIZE,
+       JSTRAT_NESTED_LOOP_MEMOIZE,
+       JSTRAT_HASH_JOIN
+       /* update NUM_PGPA_JOIN_STRATEGY if you add anything here */
+} pgpa_join_strategy;
+
+#define NUM_PGPA_JOIN_STRATEGY         ((int) JSTRAT_HASH_JOIN + 1)
+
+/*
+ * In an outer-deep join tree, every member of an unrolled join will be a scan,
+ * but join trees with other shapes can contain unrolled joins.
+ *
+ * The plan node we store here will be the inner or outer child of the join
+ * node, as appropriate, except that we look through subnodes that we regard as
+ * part of the join method itself. For instance, for a Nested Loop that
+ * materializes the inner input, we'll store the child of the Materialize node,
+ * not the Materialize node itself.
+ *
+ * If setrefs processing elided one or more nodes from the plan tree, then
+ * we'll store details about the topmost of those in elided_node; otherwise,
+ * it will be NULL.
+ *
+ * Exactly one of scan and unrolled_join will be non-NULL.
+ */
+typedef struct
+{
+       Plan       *plan;
+       ElidedNode *elided_node;
+       struct pgpa_scan *scan;
+       pgpa_unrolled_join *unrolled_join;
+} pgpa_join_member;
+
+/*
+ * We convert outer-deep join trees to a flat structure; that is, ((A JOIN B)
+ * JOIN C) JOIN D gets converted to outer = A, inner = <B C D>.  When joins
+ * aren't outer-deep, substructure is required, e.g. (A JOIN B) JOIN (C JOIN D)
+ * is represented as outer = A, inner = <B X>, where X is a pgpa_unrolled_join
+ * covering C-D.
+ */
+struct pgpa_unrolled_join
+{
+       /* Outermost member; must not itself be an unrolled join. */
+       pgpa_join_member outer;
+
+       /* Number of inner members. Length of the strategy and inner arrays. */
+       unsigned        ninner;
+
+       /* Array of strategies, one per non-outermost member. */
+       pgpa_join_strategy *strategy;
+
+       /* Array of members, excluding the outermost. Deepest first. */
+       pgpa_join_member *inner;
+};
+
+/*
+ * Does this plan node inherit from Join?
+ */
+static inline bool
+pgpa_is_join(Plan *plan)
+{
+       return IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin);
+}
+
+extern pgpa_join_unroller *pgpa_create_join_unroller(void);
+extern void pgpa_unroll_join(pgpa_plan_walker_context *walker,
+                                                        Plan *plan, bool beneath_any_gather,
+                                                        pgpa_join_unroller *join_unroller,
+                                                        pgpa_join_unroller **outer_join_unroller,
+                                                        pgpa_join_unroller **inner_join_unroller);
+extern pgpa_unrolled_join *pgpa_build_unrolled_join(pgpa_plan_walker_context *walker,
+                                                                                                       pgpa_join_unroller *join_unroller);
+extern void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_output.c b/contrib/pg_plan_advice/pgpa_output.c
new file mode 100644 (file)
index 0000000..89a675f
--- /dev/null
@@ -0,0 +1,628 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.c
+ *       produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_output.h"
+#include "pgpa_scan.h"
+
+#include "nodes/parsenodes.h"
+#include "parser/parsetree.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+/*
+ * Context object for textual advice generation.
+ *
+ * rt_identifiers is the caller-provided array of range table identifiers.
+ * See the comments at the top of pgpa_identifier.c for more details.
+ *
+ * buf is the caller-provided output buffer.
+ *
+ * wrap_column is the wrap column, so that we don't create output that is
+ * too wide. See pgpa_maybe_linebreak() and comments in pgpa_output_advice.
+ */
+typedef struct pgpa_output_context
+{
+       const char **rid_strings;
+       StringInfo      buf;
+       int                     wrap_column;
+} pgpa_output_context;
+
+static void pgpa_output_unrolled_join(pgpa_output_context *context,
+                                                                         pgpa_unrolled_join *join);
+static void pgpa_output_join_member(pgpa_output_context *context,
+                                                                       pgpa_join_member *member);
+static void pgpa_output_scan_strategy(pgpa_output_context *context,
+                                                                         pgpa_scan_strategy strategy,
+                                                                         List *scans);
+static void pgpa_output_bitmap_index_details(pgpa_output_context *context,
+                                                                                        Plan *plan);
+static void pgpa_output_relation_name(pgpa_output_context *context, Oid relid);
+static void pgpa_output_query_feature(pgpa_output_context *context,
+                                                                         pgpa_qf_type type,
+                                                                         List *query_features);
+static void pgpa_output_simple_strategy(pgpa_output_context *context,
+                                                                               char *strategy,
+                                                                               List *relid_sets);
+static void pgpa_output_no_gather(pgpa_output_context *context,
+                                                                 Bitmapset *relids);
+static void pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+                                                                 Bitmapset *relids);
+
+static char *pgpa_cstring_join_strategy(pgpa_join_strategy strategy);
+static char *pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy);
+static char *pgpa_cstring_query_feature_type(pgpa_qf_type type);
+
+static void pgpa_maybe_linebreak(StringInfo buf, int wrap_column);
+
+/*
+ * Append query advice to the provided buffer.
+ *
+ * Before calling this function, 'walker' must be used to iterate over the
+ * main plan tree and all subplans from the PlannedStmt.
+ *
+ * 'rt_identifiers' is a table of unique identifiers, one for each RTI.
+ * See pgpa_create_identifiers_for_planned_stmt().
+ *
+ * Results will be appended to 'buf'.
+ */
+void
+pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker,
+                                  pgpa_identifier *rt_identifiers)
+{
+       Index           rtable_length = list_length(walker->pstmt->rtable);
+       ListCell   *lc;
+       pgpa_output_context context;
+
+       /* Basic initialization. */
+       memset(&context, 0, sizeof(pgpa_output_context));
+       context.buf = buf;
+
+       /*
+        * Convert identifiers to string form. Note that the loop variable here is
+        * not an RTI, because RTIs are 1-based. Some RTIs will have no
+        * identifier, either because the reloptkind is RTE_JOIN or because that
+        * portion of the query didn't make it into the final plan.
+        */
+       context.rid_strings = palloc0_array(const char *, rtable_length);
+       for (int i = 0; i < rtable_length; ++i)
+               if (rt_identifiers[i].alias_name != NULL)
+                       context.rid_strings[i] = pgpa_identifier_string(&rt_identifiers[i]);
+
+       /*
+        * If the user chooses to use EXPLAIN (PLAN_ADVICE) in an 80-column window
+        * from a psql client with default settings, psql will add one space to
+        * the left of the output and EXPLAIN will add two more to the left of the
+        * advice. Thus, lines of more than 77 characters will wrap. We set the
+        * wrap limit to 76 here so that the output won't reach all the way to the
+        * very last column of the terminal.
+        *
+        * Of course, this is fairly arbitrary set of assumptions, and one could
+        * well make an argument for a different wrap limit, or for a configurable
+        * one.
+        */
+       context.wrap_column = 76;
+
+       /*
+        * Each piece of JOIN_ORDER() advice fully describes the join order for a
+        * a single unrolled join. Merging is not permitted, because that would
+        * change the meaning, e.g. SEQ_SCAN(a b c d) means simply that sequential
+        * scans should be used for all of those relations, and is thus equivalent
+        * to SEQ_SCAN(a b) SEQ_SCAN(c d), but JOIN_ORDER(a b c d) means that "a"
+        * is the driving table which is then joined to "b" then "c" then "d",
+        * which is totally different from JOIN_ORDER(a b) and JOIN_ORDER(c d).
+        */
+       foreach(lc, walker->toplevel_unrolled_joins)
+       {
+               pgpa_unrolled_join *ujoin = lfirst(lc);
+
+               if (buf->len > 0)
+                       appendStringInfoChar(buf, '\n');
+               appendStringInfo(context.buf, "JOIN_ORDER(");
+               pgpa_output_unrolled_join(&context, ujoin);
+               appendStringInfoChar(context.buf, ')');
+               pgpa_maybe_linebreak(context.buf, context.wrap_column);
+       }
+
+       /* Emit join strategy advice. */
+       for (int s = 0; s < NUM_PGPA_JOIN_STRATEGY; ++s)
+       {
+               char       *strategy = pgpa_cstring_join_strategy(s);
+
+               pgpa_output_simple_strategy(&context,
+                                                                       strategy,
+                                                                       walker->join_strategies[s]);
+       }
+
+       /*
+        * Emit scan strategy advice (but not for ordinary scans, which are
+        * definitionally uninteresting).
+        */
+       for (int c = 0; c < NUM_PGPA_SCAN_STRATEGY; ++c)
+               if (c != PGPA_SCAN_ORDINARY)
+                       pgpa_output_scan_strategy(&context, c, walker->scans[c]);
+
+       /* Emit query feature advice. */
+       for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t)
+               pgpa_output_query_feature(&context, t, walker->query_features[t]);
+
+       /* Emit NO_GATHER advice. */
+       pgpa_output_no_gather(&context, walker->no_gather_scans);
+}
+
+/*
+ * Output the members of an unrolled join, first the outermost member, and
+ * then the inner members one by one, as part of JOIN_ORDER() advice.
+ */
+static void
+pgpa_output_unrolled_join(pgpa_output_context *context,
+                                                 pgpa_unrolled_join *join)
+{
+       pgpa_output_join_member(context, &join->outer);
+
+       for (int k = 0; k < join->ninner; ++k)
+       {
+               pgpa_join_member *member = &join->inner[k];
+
+               pgpa_maybe_linebreak(context->buf, context->wrap_column);
+               appendStringInfoChar(context->buf, ' ');
+               pgpa_output_join_member(context, member);
+       }
+}
+
+/*
+ * Output a single member of an unrolled join as part of JOIN_ORDER() advice.
+ */
+static void
+pgpa_output_join_member(pgpa_output_context *context,
+                                               pgpa_join_member *member)
+{
+       if (member->unrolled_join != NULL)
+       {
+               appendStringInfoChar(context->buf, '(');
+               pgpa_output_unrolled_join(context, member->unrolled_join);
+               appendStringInfoChar(context->buf, ')');
+       }
+       else
+       {
+               pgpa_scan  *scan = member->scan;
+
+               Assert(scan != NULL);
+               if (bms_membership(scan->relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, scan->relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '{');
+                       pgpa_output_relations(context, context->buf, scan->relids);
+                       appendStringInfoChar(context->buf, '}');
+               }
+       }
+}
+
+/*
+ * Output advice for a List of pgpa_scan objects.
+ *
+ * All the scans must use the strategy specified by the "strategy" argument.
+ */
+static void
+pgpa_output_scan_strategy(pgpa_output_context *context,
+                                                 pgpa_scan_strategy strategy,
+                                                 List *scans)
+{
+       bool            first = true;
+
+       if (scans == NIL)
+               return;
+
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfo(context->buf, "%s(",
+                                        pgpa_cstring_scan_strategy(strategy));
+
+       foreach_ptr(pgpa_scan, scan, scans)
+       {
+               Plan       *plan = scan->plan;
+
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+
+               /* Output the relation identifiers. */
+               if (bms_membership(scan->relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, scan->relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '(');
+                       pgpa_output_relations(context, context->buf, scan->relids);
+                       appendStringInfoChar(context->buf, ')');
+               }
+
+               /* For scans involving indexes, output index information. */
+               if (strategy == PGPA_SCAN_INDEX)
+               {
+                       Assert(IsA(plan, IndexScan));
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+                       pgpa_output_relation_name(context, ((IndexScan *) plan)->indexid);
+               }
+               else if (strategy == PGPA_SCAN_INDEX_ONLY)
+               {
+                       Assert(IsA(plan, IndexOnlyScan));
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+                       pgpa_output_relation_name(context,
+                                                                         ((IndexOnlyScan *) plan)->indexid);
+               }
+               else if (strategy == PGPA_SCAN_BITMAP_HEAP)
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+                       pgpa_output_bitmap_index_details(context, plan->lefttree);
+               }
+       }
+
+       appendStringInfoChar(context->buf, ')');
+       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output information about which index or indexes power a BitmapHeapScan.
+ *
+ * We emit &&(i1 i2 i3) for a BitmapAnd between indexes i1, i2, and i3;
+ * and likewise ||(i1 i2 i3) for a similar BitmapOr operation.
+ */
+static void
+pgpa_output_bitmap_index_details(pgpa_output_context *context, Plan *plan)
+{
+       char       *operator;
+       List       *bitmapplans;
+       bool            first = true;
+
+       if (IsA(plan, BitmapIndexScan))
+       {
+               BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan;
+
+               pgpa_output_relation_name(context, bitmapindexscan->indexid);
+               return;
+       }
+
+       if (IsA(plan, BitmapOr))
+       {
+               operator = "||";
+               bitmapplans = ((BitmapOr *) plan)->bitmapplans;
+       }
+       else if (IsA(plan, BitmapAnd))
+       {
+               operator = "&&";
+               bitmapplans = ((BitmapAnd *) plan)->bitmapplans;
+       }
+       else
+               elog(ERROR, "unexpected node type: %d", (int) nodeTag(plan));
+
+       appendStringInfo(context->buf, "%s(", operator);
+       foreach_ptr(Plan, child_plan, bitmapplans)
+       {
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+               pgpa_output_bitmap_index_details(context, child_plan);
+       }
+       appendStringInfoChar(context->buf, ')');
+}
+
+/*
+ * Output a schema-qualified relation name.
+ */
+static void
+pgpa_output_relation_name(pgpa_output_context *context, Oid relid)
+{
+       Oid                     nspoid = get_rel_namespace(relid);
+       char       *relnamespace = get_namespace_name_or_temp(nspoid);
+       char       *relname = get_rel_name(relid);
+
+       appendStringInfoString(context->buf, quote_identifier(relnamespace));
+       appendStringInfoChar(context->buf, '.');
+       appendStringInfoString(context->buf, quote_identifier(relname));
+}
+
+/*
+ * Output advice for a List of pgpa_query_feature objects.
+ *
+ * All features must be of the type specified by the "type" argument.
+ */
+static void
+pgpa_output_query_feature(pgpa_output_context *context, pgpa_qf_type type,
+                                                 List *query_features)
+{
+       bool            first = true;
+
+       if (query_features == NIL)
+               return;
+
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfo(context->buf, "%s(",
+                                        pgpa_cstring_query_feature_type(type));
+
+       foreach_ptr(pgpa_query_feature, qf, query_features)
+       {
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+
+               if (bms_membership(qf->relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, qf->relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '(');
+                       pgpa_output_relations(context, context->buf, qf->relids);
+                       appendStringInfoChar(context->buf, ')');
+               }
+       }
+
+       appendStringInfoChar(context->buf, ')');
+       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output "simple" advice for a List of Bitmapset objects each of which
+ * contains one or more RTIs.
+ *
+ * By simple, we just mean that the advice emitted follows the most
+ * straightforward pattern: the strategy name, followed by a list of items
+ * separated by spaces and surrounded by parentheses. Individual items in
+ * the list are a single relation identifier for a Bitmapset that contains
+ * just one member, or a sub-list again separated by spaces and surrounded
+ * by parentheses for a Bitmapset with multiple members. Bitmapsets with
+ * no members probably shouldn't occur here, but if they do they'll be
+ * rendered as an empty sub-list.
+ */
+static void
+pgpa_output_simple_strategy(pgpa_output_context *context, char *strategy,
+                                                       List *relid_sets)
+{
+       bool            first = true;
+
+       if (relid_sets == NIL)
+               return;
+
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfo(context->buf, "%s(", strategy);
+
+       foreach_node(Bitmapset, relids, relid_sets)
+       {
+               if (first)
+                       first = false;
+               else
+               {
+                       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+                       appendStringInfoChar(context->buf, ' ');
+               }
+
+               if (bms_membership(relids) == BMS_SINGLETON)
+                       pgpa_output_relations(context, context->buf, relids);
+               else
+               {
+                       appendStringInfoChar(context->buf, '(');
+                       pgpa_output_relations(context, context->buf, relids);
+                       appendStringInfoChar(context->buf, ')');
+               }
+       }
+
+       appendStringInfoChar(context->buf, ')');
+       pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output NO_GATHER advice for all relations not appearing beneath any
+ * Gather or Gather Merge node.
+ */
+static void
+pgpa_output_no_gather(pgpa_output_context *context, Bitmapset *relids)
+{
+       if (relids == NULL)
+               return;
+       if (context->buf->len > 0)
+               appendStringInfoChar(context->buf, '\n');
+       appendStringInfoString(context->buf, "NO_GATHER(");
+       pgpa_output_relations(context, context->buf, relids);
+       appendStringInfoChar(context->buf, ')');
+}
+
+/*
+ * Output the identifiers for each RTI in the provided set.
+ *
+ * Identifiers are separated by spaces, and a line break is possible after
+ * each one.
+ */
+static void
+pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+                                         Bitmapset *relids)
+{
+       int                     rti = -1;
+       bool            first = true;
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               const char *rid_string = context->rid_strings[rti - 1];
+
+               if (rid_string == NULL)
+                       elog(ERROR, "no identifier for RTI %d", rti);
+
+               if (first)
+               {
+                       first = false;
+                       appendStringInfoString(buf, rid_string);
+               }
+               else
+               {
+                       pgpa_maybe_linebreak(buf, context->wrap_column);
+                       appendStringInfo(buf, " %s", rid_string);
+               }
+       }
+}
+
+/*
+ * Get a C string that corresponds to the specified join strategy.
+ */
+static char *
+pgpa_cstring_join_strategy(pgpa_join_strategy strategy)
+{
+       switch (strategy)
+       {
+               case JSTRAT_MERGE_JOIN_PLAIN:
+                       return "MERGE_JOIN_PLAIN";
+               case JSTRAT_MERGE_JOIN_MATERIALIZE:
+                       return "MERGE_JOIN_MATERIALIZE";
+               case JSTRAT_NESTED_LOOP_PLAIN:
+                       return "NESTED_LOOP_PLAIN";
+               case JSTRAT_NESTED_LOOP_MATERIALIZE:
+                       return "NESTED_LOOP_MATERIALIZE";
+               case JSTRAT_NESTED_LOOP_MEMOIZE:
+                       return "NESTED_LOOP_MEMOIZE";
+               case JSTRAT_HASH_JOIN:
+                       return "HASH_JOIN";
+       }
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Get a C string that corresponds to the specified scan strategy.
+ */
+static char *
+pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy)
+{
+       switch (strategy)
+       {
+               case PGPA_SCAN_ORDINARY:
+                       return "ORDINARY_SCAN";
+               case PGPA_SCAN_SEQ:
+                       return "SEQ_SCAN";
+               case PGPA_SCAN_BITMAP_HEAP:
+                       return "BITMAP_HEAP_SCAN";
+               case PGPA_SCAN_FOREIGN:
+                       return "FOREIGN_JOIN";
+               case PGPA_SCAN_INDEX:
+                       return "INDEX_SCAN";
+               case PGPA_SCAN_INDEX_ONLY:
+                       return "INDEX_ONLY_SCAN";
+               case PGPA_SCAN_PARTITIONWISE:
+                       return "PARTITIONWISE";
+               case PGPA_SCAN_TID:
+                       return "TID_SCAN";
+       }
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Get a C string that corresponds to the specified scan strategy.
+ */
+static char *
+pgpa_cstring_query_feature_type(pgpa_qf_type type)
+{
+       switch (type)
+       {
+               case PGPAQF_GATHER:
+                       return "GATHER";
+               case PGPAQF_GATHER_MERGE:
+                       return "GATHER_MERGE";
+               case PGPAQF_SEMIJOIN_NON_UNIQUE:
+                       return "SEMIJOIN_NON_UNIQUE";
+               case PGPAQF_SEMIJOIN_UNIQUE:
+                       return "SEMIJOIN_UNIQUE";
+       }
+
+
+       pg_unreachable();
+       return NULL;
+}
+
+/*
+ * Insert a line break into the StringInfoData, if needed.
+ *
+ * If wrap_column is zero or negative, this does nothing. Otherwise, we
+ * consider inserting a newline. We only insert a newline if the length of
+ * the last line in the buffer exceeds wrap_column, and not if we'd be
+ * inserting a newline at or before the beginning of the current line.
+ *
+ * The position at which the newline is inserted is simply wherever the
+ * buffer ended the last time this function was called. In other words,
+ * the caller is expected to call this function every time we reach a good
+ * place for a line break.
+ */
+static void
+pgpa_maybe_linebreak(StringInfo buf, int wrap_column)
+{
+       char       *trailing_nl;
+       int                     line_start;
+       int                     save_cursor;
+
+       /* If line wrapping is disabled, exit quickly. */
+       if (wrap_column <= 0)
+               return;
+
+       /*
+        * Set line_start to the byte offset within buf->data of the first
+        * character of the current line, where the current line means the last
+        * one in the buffer. Note that line_start could be the offset of the
+        * trailing '\0' if the last character in the buffer is a line break.
+        */
+       trailing_nl = strrchr(buf->data, '\n');
+       if (trailing_nl == NULL)
+               line_start = 0;
+       else
+               line_start = (trailing_nl - buf->data) + 1;
+
+       /*
+        * Remember that the current end of the buffer is a potential location to
+        * insert a line break on a future call to this function.
+        */
+       save_cursor = buf->cursor;
+       buf->cursor = buf->len;
+
+       /* If we haven't passed the wrap column, we don't need a newline. */
+       if (buf->len - line_start <= wrap_column)
+               return;
+
+       /*
+        * It only makes sense to insert a newline at a position later than the
+        * beginning of the current line.
+        */
+       if (buf->cursor <= line_start)
+               return;
+
+       /* Insert a newline at the previous cursor location. */
+       enlargeStringInfo(buf, 1);
+       memmove(&buf->data[save_cursor] + 1, &buf->data[save_cursor],
+                       buf->len - save_cursor);
+       ++buf->cursor;
+       buf->data[++buf->len] = '\0';
+       buf->data[save_cursor] = '\n';
+}
diff --git a/contrib/pg_plan_advice/pgpa_output.h b/contrib/pg_plan_advice/pgpa_output.h
new file mode 100644 (file)
index 0000000..47496d7
--- /dev/null
@@ -0,0 +1,22 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.h
+ *       produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_OUTPUT_H
+#define PGPA_OUTPUT_H
+
+#include "pgpa_identifier.h"
+#include "pgpa_walker.h"
+
+extern void pgpa_output_advice(StringInfo buf,
+                                                          pgpa_plan_walker_context *walker,
+                                                          pgpa_identifier *rt_identifiers);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_parser.y b/contrib/pg_plan_advice/pgpa_parser.y
new file mode 100644 (file)
index 0000000..4617e7f
--- /dev/null
@@ -0,0 +1,337 @@
+%{
+/*
+ * Parser for plan advice
+ *
+ * Copyright (c) 2000-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_parser.y
+ */
+
+#include "postgres.h"
+
+#include <float.h>
+#include <math.h>
+
+#include "fmgr.h"
+#include "nodes/miscnodes.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
+
+#include "pgpa_ast.h"
+#include "pgpa_parser.h"
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc.  This prevents
+ * memory leaks if we error out during parsing.
+ */
+#define YYMALLOC palloc
+#define YYFREE   pfree
+%}
+
+/* BISON Declarations */
+%parse-param {List **result}
+%parse-param {char **parse_error_msg_p}
+%parse-param {yyscan_t yyscanner}
+%lex-param {List **result}
+%lex-param {char **parse_error_msg_p}
+%lex-param {yyscan_t yyscanner}
+%pure-parser
+%expect 0
+%name-prefix="pgpa_yy"
+
+%union
+{
+       char       *str;
+       int                     integer;
+       List       *list;
+       pgpa_advice_item *item;
+       pgpa_advice_target *target;
+       pgpa_index_target *itarget;
+}
+%token <str> TOK_IDENT TOK_TAG_JOIN_ORDER TOK_TAG_BITMAP TOK_TAG_INDEX
+%token <str> TOK_TAG_SIMPLE TOK_TAG_GENERIC
+%token <integer> TOK_INTEGER
+%token TOK_OR TOK_AND
+
+%type <integer> opt_ri_occurrence
+%type <item> advice_item
+%type <list> advice_item_list bitmap_sublist bitmap_target_list generic_target_list
+%type <list> index_target_list join_order_target_list
+%type <list> opt_partition simple_target_list
+%type <str> identifier opt_plan_name
+%type <target> generic_sublist join_order_sublist
+%type <target> relation_identifier
+%type <itarget> bitmap_target_item index_name
+
+%start parse_toplevel
+
+/* Grammar follows */
+%%
+
+parse_toplevel: advice_item_list
+               {
+                       (void) yynerrs;                         /* suppress compiler warning */
+                       *result = $1;
+               }
+       ;
+
+advice_item_list: advice_item_list advice_item
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+advice_item: TOK_TAG_JOIN_ORDER '(' join_order_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       $$->tag = PGPA_TAG_JOIN_ORDER;
+                       $$->targets = $3;
+               }
+       | TOK_TAG_INDEX '(' index_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       if (strcmp($1, "index_only_scan") == 0)
+                               $$->tag = PGPA_TAG_INDEX_ONLY_SCAN;
+                       else if (strcmp($1, "index_scan") == 0)
+                               $$->tag = PGPA_TAG_INDEX_SCAN;
+                       else
+                               elog(ERROR, "tag parsing failed: %s", $1);
+                       $$->targets = $3;
+               }
+       | TOK_TAG_BITMAP '(' bitmap_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       $$->tag = PGPA_TAG_BITMAP_HEAP_SCAN;
+                       $$->targets = $3;
+               }
+       | TOK_TAG_SIMPLE '(' simple_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_item);
+                       if (strcmp($1, "no_gather") == 0)
+                               $$->tag = PGPA_TAG_NO_GATHER;
+                       else if (strcmp($1, "seq_scan") == 0)
+                               $$->tag = PGPA_TAG_SEQ_SCAN;
+                       else if (strcmp($1, "tid_scan") == 0)
+                               $$->tag = PGPA_TAG_TID_SCAN;
+                       else
+                               elog(ERROR, "tag parsing failed: %s", $1);
+                       $$->targets = $3;
+               }
+       | TOK_TAG_GENERIC '(' generic_target_list ')'
+               {
+                       bool    fail;
+
+                       $$ = palloc0_object(pgpa_advice_item);
+                       $$->tag = pgpa_parse_advice_tag($1, &fail);
+                       if (fail)
+                       {
+                               pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                        "unrecognized advice tag");
+                       }
+
+                       if ($$->tag == PGPA_TAG_FOREIGN_JOIN)
+                       {
+                               foreach_ptr(pgpa_advice_target, target, $3)
+                               {
+                                       if (target->ttype == PGPA_TARGET_IDENTIFIER ||
+                                               list_length(target->children) == 1)
+                                                       pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                                "FOREIGN_JOIN targets must contain more than one relation identifier");
+                               }
+                       }
+
+                       $$->targets = $3;
+               }
+       ;
+
+relation_identifier: identifier opt_ri_occurrence opt_partition opt_plan_name
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_IDENTIFIER;
+                       $$->rid.alias_name = $1;
+                       $$->rid.occurrence = $2;
+                       if (list_length($3) == 2)
+                       {
+                               $$->rid.partnsp = linitial($3);
+                               $$->rid.partrel = lsecond($3);
+                       }
+                       else if ($3 != NIL)
+                               $$->rid.partrel = linitial($3);
+                       $$->rid.plan_name = $4;
+               }
+       ;
+
+index_name: identifier
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_NAME;
+                       $$->indname = $1;
+               }
+       | identifier '.' identifier
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_NAME;
+                       $$->indnamespace = $1;
+                       $$->indname = $3;
+               }
+       ;
+
+opt_ri_occurrence:
+       '#' TOK_INTEGER
+               {
+                       if ($2 <= 0)
+                               pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                        "only positive occurrence numbers are permitted");
+                       $$ = $2;
+               }
+       |
+               {
+                       /* The default occurrence number is 1. */
+                       $$ = 1;
+               }
+       ;
+
+identifier: TOK_IDENT
+       | TOK_TAG_JOIN_ORDER
+       | TOK_TAG_INDEX
+       | TOK_TAG_BITMAP
+       | TOK_TAG_SIMPLE
+       | TOK_TAG_GENERIC
+       ;
+
+/*
+ * When generating advice, we always schema-qualify the partition name, but
+ * when parsing advice, we accept a specification that lacks one.
+ */
+opt_partition:
+       '/' TOK_IDENT '.' TOK_IDENT
+               { $$ = list_make2($2, $4); }
+       | '/' TOK_IDENT
+               { $$ = list_make1($2); }
+       |
+               { $$ = NIL; }
+       ;
+
+opt_plan_name:
+       '@' TOK_IDENT
+               { $$ = $2; }
+       |
+               { $$ = NULL; }
+       ;
+
+bitmap_target_list: bitmap_target_list relation_identifier bitmap_target_item
+               {
+                       $2->itarget = $3;
+                       $$ = lappend($1, $2);
+               }
+       |
+               { $$ = NIL; }
+       ;
+
+bitmap_target_item: index_name
+               { $$ = $1; }
+       | TOK_OR '(' bitmap_sublist ')'
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_OR;
+                       $$->children = $3;
+               }
+       | TOK_AND '(' bitmap_sublist ')'
+               {
+                       $$ = palloc0_object(pgpa_index_target);
+                       $$->itype = PGPA_INDEX_AND;
+                       $$->children = $3;
+               }
+       ;
+
+bitmap_sublist: bitmap_sublist bitmap_target_item
+               { $$ = lappend($1, $2); }
+       | bitmap_target_item
+               { $$ = list_make1($1); }
+       ;
+
+generic_target_list: generic_target_list relation_identifier
+               { $$ = lappend($1, $2); }
+       | generic_target_list generic_sublist
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+generic_sublist: '(' generic_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_ORDERED_LIST;
+                       $$->children = $2;
+               }
+       ;
+
+index_target_list:
+         index_target_list relation_identifier index_name
+               {
+                       $2->itarget = $3;
+                       $$ = lappend($1, $2);
+               }
+       |
+               { $$ = NIL; }
+       ;
+
+join_order_target_list: join_order_target_list relation_identifier
+               { $$ = lappend($1, $2); }
+       | join_order_target_list join_order_sublist
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+join_order_sublist:
+       '(' join_order_target_list ')'
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_ORDERED_LIST;
+                       $$->children = $2;
+               }
+       | '{' simple_target_list '}'
+               {
+                       $$ = palloc0_object(pgpa_advice_target);
+                       $$->ttype = PGPA_TARGET_UNORDERED_LIST;
+                       $$->children = $2;
+               }
+       ;
+
+simple_target_list: simple_target_list relation_identifier
+               { $$ = lappend($1, $2); }
+       |
+               { $$ = NIL; }
+       ;
+
+%%
+
+/*
+ * Parse an advice_string and return the resulting list of pgpa_advice_item
+ * objects. If a parse error occurs, instead return NULL.
+ *
+ * If the return value is NULL, *error_p will be set to the error message;
+ * otherwise, *error_p will be set to NULL.
+ */
+List *
+pgpa_parse(const char *advice_string, char **error_p)
+{
+       yyscan_t        scanner;
+       List       *result;
+       char       *error = NULL;
+
+       pgpa_scanner_init(advice_string, &scanner);
+       pgpa_yyparse(&result, &error, scanner);
+       pgpa_scanner_finish(scanner);
+
+       if (error != NULL)
+       {
+               *error_p = error;
+               return NULL;
+       }
+
+       *error_p = NULL;
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_planner.c b/contrib/pg_plan_advice/pgpa_planner.c
new file mode 100644 (file)
index 0000000..767facc
--- /dev/null
@@ -0,0 +1,1706 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_planner.c
+ *       planner hooks
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_planner.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+#include "pgpa_identifier.h"
+#include "pgpa_output.h"
+#include "pgpa_planner.h"
+#include "pgpa_trove.h"
+#include "pgpa_walker.h"
+
+#include "common/hashfn_unstable.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/extendplan.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/plancat.h"
+#include "optimizer/planner.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * When assertions are enabled, we try generating relation identifiers during
+ * planning, saving them in a hash table, and then cross-checking them against
+ * the ones generated after planning is complete.
+ */
+typedef struct pgpa_ri_checker_key
+{
+       char       *plan_name;
+       Index           rti;
+} pgpa_ri_checker_key;
+
+typedef struct pgpa_ri_checker
+{
+       pgpa_ri_checker_key key;
+       uint32          status;
+       const char *rid_string;
+} pgpa_ri_checker;
+
+static uint32 pgpa_ri_checker_hash_key(pgpa_ri_checker_key key);
+
+static inline bool
+pgpa_ri_checker_compare_key(pgpa_ri_checker_key a, pgpa_ri_checker_key b)
+{
+       if (a.rti != b.rti)
+               return false;
+       if (a.plan_name == NULL)
+               return (b.plan_name == NULL);
+       if (b.plan_name == NULL)
+               return false;
+       return strcmp(a.plan_name, b.plan_name) == 0;
+}
+
+#define SH_PREFIX                      pgpa_ri_check
+#define SH_ELEMENT_TYPE                pgpa_ri_checker
+#define SH_KEY_TYPE                    pgpa_ri_checker_key
+#define SH_KEY                         key
+#define SH_HASH_KEY(tb, key)   pgpa_ri_checker_hash_key(key)
+#define        SH_EQUAL(tb, a, b)      pgpa_ri_checker_compare_key(a, b)
+#define SH_SCOPE                       static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+#endif
+
+typedef struct pgpa_planner_state
+{
+       ExplainState *explain_state;
+       pgpa_trove *trove;
+       MemoryContext trove_cxt;
+
+#ifdef USE_ASSERT_CHECKING
+       pgpa_ri_check_hash *ri_check_hash;
+#endif
+} pgpa_planner_state;
+
+typedef struct pgpa_join_state
+{
+       /* Most-recently-considered outer rel. */
+       RelOptInfo *outerrel;
+
+       /* Most-recently-considered inner rel. */
+       RelOptInfo *innerrel;
+
+       /*
+        * Array of relation identifiers for all members of this joinrel, with
+        * outerrel idenifiers before innerrel identifiers.
+        */
+       pgpa_identifier *rids;
+
+       /* Number of outer rel identifiers. */
+       int                     outer_count;
+
+       /* Number of inner rel identifiers. */
+       int                     inner_count;
+
+       /*
+        * Trove lookup results.
+        *
+        * join_entries and rel_entries are arrays of entries, and join_indexes
+        * and rel_indexes are the integer offsets within those arrays of entries
+        * potentially relevant to us. The "join" fields correspond to a lookup
+        * using PGPA_TROVE_LOOKUP_JOIN and the "rel" fields to a lookup using
+        * PGPA_TROVE_LOOKUP_REL.
+        */
+       pgpa_trove_entry *join_entries;
+       Bitmapset  *join_indexes;
+       pgpa_trove_entry *rel_entries;
+       Bitmapset  *rel_indexes;
+} pgpa_join_state;
+
+/* Saved hook values */
+static get_relation_info_hook_type prev_get_relation_info = NULL;
+static join_path_setup_hook_type prev_join_path_setup = NULL;
+static joinrel_setup_hook_type prev_joinrel_setup = NULL;
+static planner_setup_hook_type prev_planner_setup = NULL;
+static planner_shutdown_hook_type prev_planner_shutdown = NULL;
+
+/* Other global variabes */
+static int     planner_extension_id = -1;
+
+/* Function prototypes. */
+static void pgpa_get_relation_info(PlannerInfo *root,
+                                                                  Oid relationObjectId,
+                                                                  bool inhparent,
+                                                                  RelOptInfo *rel);
+static void pgpa_joinrel_setup(PlannerInfo *root,
+                                                          RelOptInfo *joinrel,
+                                                          RelOptInfo *outerrel,
+                                                          RelOptInfo *innerrel,
+                                                          SpecialJoinInfo *sjinfo,
+                                                          List *restrictlist);
+static void pgpa_join_path_setup(PlannerInfo *root,
+                                                                RelOptInfo *joinrel,
+                                                                RelOptInfo *outerrel,
+                                                                RelOptInfo *innerrel,
+                                                                JoinType jointype,
+                                                                JoinPathExtraData *extra);
+static void pgpa_planner_setup(PlannerGlobal *glob, Query *parse,
+                                                          const char *query_string,
+                                                          double *tuple_fraction,
+                                                          ExplainState *es);
+static void pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
+                                                                 const char *query_string, PlannedStmt *pstmt);
+static void pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p,
+                                                                                         char *plan_name,
+                                                                                         pgpa_join_state *pjs);
+static void pgpa_planner_apply_join_path_advice(JoinType jointype,
+                                                                                               uint64 *pgs_mask_p,
+                                                                                               char *plan_name,
+                                                                                               pgpa_join_state *pjs);
+static void pgpa_planner_apply_scan_advice(RelOptInfo *rel,
+                                                                                  pgpa_trove_entry *scan_entries,
+                                                                                  Bitmapset *scan_indexes,
+                                                                                  pgpa_trove_entry *rel_entries,
+                                                                                  Bitmapset *rel_indexes);
+static uint64 pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag);
+static bool pgpa_join_order_permits_join(int outer_count, int inner_count,
+                                                                                pgpa_identifier *rids,
+                                                                                pgpa_trove_entry *entry);
+static bool pgpa_join_method_permits_join(int outer_count, int inner_count,
+                                                                                 pgpa_identifier *rids,
+                                                                                 pgpa_trove_entry *entry,
+                                                                                 bool *restrict_method);
+static bool pgpa_opaque_join_permits_join(int outer_count, int inner_count,
+                                                                                 pgpa_identifier *rids,
+                                                                                 pgpa_trove_entry *entry,
+                                                                                 bool *restrict_method);
+
+static List *pgpa_planner_append_feedback(List *list, pgpa_trove *trove,
+                                                                                 pgpa_trove_lookup_type type,
+                                                                                 pgpa_identifier *rt_identifiers,
+                                                                                 pgpa_plan_walker_context *walker);
+
+static inline void pgpa_ri_checker_save(pgpa_planner_state *pps,
+                                                                               PlannerInfo *root,
+                                                                               RelOptInfo *rel);
+static void pgpa_ri_checker_validate(pgpa_planner_state *pps,
+                                                                        PlannedStmt *pstmt);
+
+/*
+ * Install planner-related hooks.
+ */
+void
+pgpa_planner_install_hooks(void)
+{
+       planner_extension_id = GetPlannerExtensionId("pg_plan_advice");
+       prev_get_relation_info = get_relation_info_hook;
+       get_relation_info_hook = pgpa_get_relation_info;
+       prev_joinrel_setup = joinrel_setup_hook;
+       joinrel_setup_hook = pgpa_joinrel_setup;
+       prev_join_path_setup = join_path_setup_hook;
+       join_path_setup_hook = pgpa_join_path_setup;
+       prev_planner_setup = planner_setup_hook;
+       planner_setup_hook = pgpa_planner_setup;
+       prev_planner_shutdown = planner_shutdown_hook;
+       planner_shutdown_hook = pgpa_planner_shutdown;
+}
+
+/*
+ * Hook function for get_relation_info().
+ *
+ * We can apply scan advice at this opint, and we also usee this as an
+ * opportunity to do range-table identifier cross-checking in assert-enabled
+ * builds.
+ *
+ * XXX: We currently emit useless advice like NO_GATHER("*RESULT*") for trivial
+ * queries. The advice is useless because get_relation_info isn't called for
+ * non-relation RTEs. We should either suppress the advice in such cases, or
+ * add a hook that can apply it.
+ */
+static void
+pgpa_get_relation_info(PlannerInfo *root, Oid relationObjectId,
+                                          bool inhparent, RelOptInfo *rel)
+{
+       pgpa_planner_state *pps;
+
+       /* Fetch our private state, set up by pgpa_planner_setup(). */
+       pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+
+       /* Save details needed for range table identifier cross-checking. */
+       if (pps != NULL)
+               pgpa_ri_checker_save(pps, root, rel);
+
+       /* If query advice was provided, search for relevant entries. */
+       if (pps != NULL && pps->trove != NULL)
+       {
+               pgpa_identifier rid;
+               pgpa_trove_result tresult_scan;
+               pgpa_trove_result tresult_rel;
+
+               /* Search for scan advice and general rel advice. */
+               pgpa_compute_identifier_by_rti(root, rel->relid, &rid);
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, &rid,
+                                                 &tresult_scan);
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, 1, &rid,
+                                                 &tresult_rel);
+
+               /* If relevant entries were found, apply them. */
+               if (tresult_scan.indexes != NULL || tresult_rel.indexes != NULL)
+                       pgpa_planner_apply_scan_advice(rel,
+                                                                                  tresult_scan.entries,
+                                                                                  tresult_scan.indexes,
+                                                                                  tresult_rel.entries,
+                                                                                  tresult_rel.indexes);
+       }
+
+       /* Pass call to previous hook. */
+       if (prev_get_relation_info)
+               (*prev_get_relation_info) (root, relationObjectId, inhparent, rel);
+}
+
+/*
+ * Search for advice pertaining to a proposed join.
+ */
+static pgpa_join_state *
+pgpa_get_join_state(PlannerInfo *root, RelOptInfo *joinrel,
+                                       RelOptInfo *outerrel, RelOptInfo *innerrel)
+{
+       pgpa_planner_state *pps;
+       pgpa_join_state *pjs;
+       bool            new_pjs = false;
+
+       /* Fetch our private state, set up by pgpa_planner_setup(). */
+       pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+       if (pps == NULL || pps->trove == NULL)
+       {
+               /* No advice applies to this query, hence none to this joinrel. */
+               return NULL;
+       }
+
+       /*
+        * See whether we've previously associated a pgpa_join_state with this
+        * joinrel. If we have not, we need to try to construct one. If we have,
+        * then there are two cases: (a) if innerrel and outerrel are unchanged,
+        * we can simply use it, and (b) if they have changed, we need to rejigger
+        * the array of identifiers but can still skip the trove lookup.
+        */
+       pjs = GetRelOptInfoExtensionState(joinrel, planner_extension_id);
+       if (pjs != NULL)
+       {
+               if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL)
+               {
+                       /*
+                        * If there's no potentially relevant advice, then the presence of
+                        * this pgpa_join_state acts like a negative cache entry: it tells
+                        * us not to bother searching the trove for advice, because we
+                        * will not find any.
+                        */
+                       return NULL;
+               }
+
+               if (pjs->outerrel == outerrel && pjs->innerrel == innerrel)
+               {
+                       /* No updates required, so just return. */
+                       /* XXX. Does this need to do something different under GEQO? */
+                       return pjs;
+               }
+       }
+
+       /*
+        * If there's no pgpa_join_state yet, we need to allocate one. Trove keys
+        * will not get built for RTE_JOIN RTEs, so the array may end up being
+        * larger than needed. It's not worth trying to compute a perfectly
+        * accurate count here.
+        */
+       if (pjs == NULL)
+       {
+               int                     pessimistic_count = bms_num_members(joinrel->relids);
+
+               pjs = palloc0_object(pgpa_join_state);
+               pjs->rids = palloc_array(pgpa_identifier, pessimistic_count);
+               new_pjs = true;
+       }
+
+       /*
+        * Either we just allocated a new pgpa_join_state, or the existing one
+        * needs reconfiguring for a new innerrel and outerrel. The required array
+        * size can't change, so we can overwrite the existing one.
+        */
+       pjs->outerrel = outerrel;
+       pjs->innerrel = innerrel;
+       pjs->outer_count =
+               pgpa_compute_identifiers_by_relids(root, outerrel->relids, pjs->rids);
+       pjs->inner_count =
+               pgpa_compute_identifiers_by_relids(root, innerrel->relids,
+                                                                                  pjs->rids + pjs->outer_count);
+
+       /*
+        * If we allocated a new pgpa_join_state, search our trove of advice for
+        * relevant entries. The trove lookup will return the same results for
+        * every outerrel/innerrel combination, so we don't need to repeat that
+        * work every time.
+        */
+       if (new_pjs)
+       {
+               pgpa_trove_result tresult;
+
+               /* Find join entries. */
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_JOIN,
+                                                 pjs->outer_count + pjs->inner_count,
+                                                 pjs->rids, &tresult);
+               pjs->join_entries = tresult.entries;
+               pjs->join_indexes = tresult.indexes;
+
+               /* Find rel entries. */
+               pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL,
+                                                 pjs->outer_count + pjs->inner_count,
+                                                 pjs->rids, &tresult);
+               pjs->rel_entries = tresult.entries;
+               pjs->rel_indexes = tresult.indexes;
+
+               /* Now that the new pgpa_join_state is fully valid, save a pointer. */
+               SetRelOptInfoExtensionState(joinrel, planner_extension_id, pjs);
+
+               /*
+                * If there was no relevant advice found, just return NULL. This
+                * pgpa_join_state will stick around as a sort of negative cache
+                * entry, so that future calls for this same joinrel quickly return
+                * NULL.
+                */
+               if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL)
+                       return NULL;
+       }
+
+       return pjs;
+}
+
+/*
+ * Enforce any provided advice that is relevant to any method of implementing
+ * this join.
+ *
+ * Although we're passed the outerrel and innerrel here, those are just
+ * whatever values happened to prompt the creation of this joinrel; they
+ * shouldn't really influence our choice of what advice to apply.
+ */
+static void
+pgpa_joinrel_setup(PlannerInfo *root, RelOptInfo *joinrel,
+                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                  SpecialJoinInfo *sjinfo, List *restrictlist)
+{
+       pgpa_join_state *pjs;
+
+       Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE);
+
+       /* Get our private state information for this join. */
+       pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel);
+
+       /* If there is relevant advice, call a helper function to apply it. */
+       if (pjs != NULL)
+               pgpa_planner_apply_joinrel_advice(&joinrel->pgs_mask,
+                                                                                 root->plan_name,
+                                                                                 pjs);
+
+       /* Pass call to previous hook. */
+       if (prev_joinrel_setup)
+               (*prev_joinrel_setup) (root, joinrel, outerrel, innerrel,
+                                                          sjinfo, restrictlist);
+}
+
+/*
+ * Enforce any provided advice that is relevant to this particular method of
+ * implementing this particular join.
+ */
+static void
+pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel,
+                                        RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                        JoinType jointype, JoinPathExtraData *extra)
+{
+       pgpa_join_state *pjs;
+
+       Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE);
+
+       /* Get our private state information for this join. */
+       pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel);
+
+       /* If there is relevant advice, call a helper function to apply it. */
+       if (pjs != NULL)
+               pgpa_planner_apply_join_path_advice(jointype,
+                                                                                       &extra->pgs_mask,
+                                                                                       root->plan_name,
+                                                                                       pjs);
+
+       /* Pass call to previous hook. */
+       if (prev_join_path_setup)
+               (*prev_join_path_setup) (root, joinrel, outerrel, innerrel,
+                                                                jointype, extra);
+}
+
+/*
+ * Prepare advice for use by a query.
+ */
+static void
+pgpa_planner_setup(PlannerGlobal *glob, Query *parse, const char *query_string,
+                                  double *tuple_fraction, ExplainState *es)
+{
+       pgpa_trove *trove = NULL;
+       pgpa_planner_state *pps;
+       char       *error;
+       bool            needs_pps = false;
+
+       /*
+        * If any advice was provided, build a trove of advice for use during
+        * planning.
+        */
+       if (pg_plan_advice_advice != NULL && pg_plan_advice_advice[0] != '\0')
+       {
+               List       *advice_items;
+
+               /*
+                * Parsing shouldn't fail here, because we must have previously parsed
+                * successfully in pg_plan_advice_advice_check_hook, but if it does,
+                * emit a warning.
+                */
+               advice_items = pgpa_parse(pg_plan_advice_advice, &error);
+               if (error)
+                       elog(WARNING, "could not parse advice: %s", error);
+
+               /*
+                * It's possible that the advice string was non-empty but contained no
+                * actual advice, e.g. it was all whitespace.
+                */
+               if (advice_items != NIL)
+               {
+                       trove = pgpa_build_trove(advice_items);
+                       needs_pps = true;
+               }
+       }
+
+#ifdef USE_ASSERT_CHECKING
+
+       /*
+        * If asserts are enabled, always build a private state object for
+        * cross-checks.
+        */
+       needs_pps = true;
+#endif
+
+       /* Initialize and store private state, if required. */
+       if (needs_pps)
+       {
+               pps = palloc0_object(pgpa_planner_state);
+               pps->explain_state = es;
+               pps->trove = trove;
+#ifdef USE_ASSERT_CHECKING
+               pps->ri_check_hash =
+                       pgpa_ri_check_create(CurrentMemoryContext, 1024, NULL);
+#endif
+               SetPlannerGlobalExtensionState(glob, planner_extension_id, pps);
+       }
+}
+
+/*
+ * Carry out whatever work we want to do after planning is complete.
+ */
+static void
+pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
+                                         const char *query_string, PlannedStmt *pstmt)
+{
+       pgpa_planner_state *pps;
+       pgpa_trove *trove = NULL;
+       ExplainState *es = NULL;
+       pgpa_plan_walker_context walker = {0};  /* placate compiler */
+       bool            do_advice_feedback;
+       bool            do_collect_advice;
+       List       *pgpa_items = NIL;
+       pgpa_identifier *rt_identifiers = NULL;
+
+       /* Fetch our private state, set up by pgpa_planner_setup(). */
+       pps = GetPlannerGlobalExtensionState(glob, planner_extension_id);
+       if (pps != NULL)
+       {
+               trove = pps->trove;
+               es = pps->explain_state;
+       }
+
+       /* If at least one collector is enabled, generate advice. */
+       do_collect_advice = (pg_plan_advice_local_collection_limit > 0 ||
+                                                pg_plan_advice_shared_collection_limit > 0);
+
+       /* If we applied advice, generate feedback. */
+       do_advice_feedback = (trove != NULL && es != NULL);
+
+       /* If either of the above apply, analyze the resulting PlannedStmt. */
+       if (do_collect_advice || do_advice_feedback)
+       {
+               pgpa_plan_walker(&walker, pstmt);
+               rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
+       }
+
+       /*
+        * If advice collection is enabled, put the advice in string form and send
+        * it to the collector.
+        */
+       if (do_collect_advice)
+       {
+               char       *advice_string;
+               StringInfoData buf;
+
+               /* Generate a textual advice string. */
+               initStringInfo(&buf);
+               pgpa_output_advice(&buf, &walker, rt_identifiers);
+               advice_string = buf.data;
+
+               /* If the advice string is empty, don't bother collecting it. */
+               if (advice_string[0] != '\0')
+                       pgpa_collect_advice(pstmt->queryId, query_string, advice_string);
+
+               /*
+                * If we've gone to the trouble of generating an advice string, and if
+                * we're inside EXPLAIN, save the string so we don't need to
+                * regenerate it.
+                */
+               if (es != NULL)
+                       pgpa_items = lappend(pgpa_items,
+                                                                makeDefElem("advice_string",
+                                                                                        (Node *) makeString(advice_string),
+                                                                                        -1));
+       }
+
+       /*
+        * If we are planning within EXPLAIN, make arrangements to allow EXPLAIN
+        * to tell the user what has happened with the provided advice.
+        *
+        * NB: If EXPLAIN is used on a prepared is a prepared statement, planning
+        * will have already happened happened without recording these details. We
+        * could consider adding a GUC to cater to that scenario; or we could do
+        * this work all the time, but that seems like too much overhead.
+        */
+       if (do_advice_feedback)
+       {
+               List       *feedback = NIL;
+
+               /*
+                * Inject a Node-tree representation of all the trove-entry flags into
+                * the PlannedStmt.
+                */
+               feedback = pgpa_planner_append_feedback(feedback,
+                                                                                               trove,
+                                                                                               PGPA_TROVE_LOOKUP_SCAN,
+                                                                                               rt_identifiers, &walker);
+               feedback = pgpa_planner_append_feedback(feedback,
+                                                                                               trove,
+                                                                                               PGPA_TROVE_LOOKUP_JOIN,
+                                                                                               rt_identifiers, &walker);
+               feedback = pgpa_planner_append_feedback(feedback,
+                                                                                               trove,
+                                                                                               PGPA_TROVE_LOOKUP_REL,
+                                                                                               rt_identifiers, &walker);
+
+               pgpa_items = lappend(pgpa_items, makeDefElem("feedback",
+                                                                                                        (Node *) feedback,
+                                                                                                        -1));
+       }
+
+       /* Push whatever data we're saving into the PlannedStmt. */
+       if (pgpa_items != NIL)
+               pstmt->extension_state =
+                       lappend(pstmt->extension_state,
+                                       makeDefElem("pg_plan_advice", (Node *) pgpa_items, -1));
+
+       /*
+        * If assertions are enabled, cross-check the generated range table
+        * identifiers.
+        */
+       if (pps != NULL)
+               pgpa_ri_checker_validate(pps, pstmt);
+}
+
+/*
+ * Enforce overall restrictions on a join relation that apply uniformly
+ * regardless of the choice of inner and outer rel.
+ */
+static void
+pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p, char *plan_name,
+                                                                 pgpa_join_state *pjs)
+{
+       int                     i = -1;
+       int                     flags;
+       bool            gather_conflict = false;
+       uint64          gather_mask = 0;
+       Bitmapset  *gather_partial_match = NULL;
+       Bitmapset  *gather_full_match = NULL;
+       bool            partitionwise_conflict = false;
+       int                     partitionwise_outcome = 0;
+       Bitmapset  *partitionwise_partial_match = NULL;
+       Bitmapset  *partitionwise_full_match = NULL;
+
+       /* Iterate over all possibly-relevant advice. */
+       while ((i = bms_next_member(pjs->rel_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *entry = &pjs->rel_entries[i];
+               pgpa_itm_type itm;
+               bool            full_match = false;
+               uint64          my_gather_mask = 0;
+               int                     my_partitionwise_outcome = 0;   /* >0 yes, <0 no */
+
+               /*
+                * For GATHER and GATHER_MERGE, if the specified relations exactly
+                * match this joinrel, do whatever the advice says; otherwise, don't
+                * allow Gather or Gather Merge at this level. For NO_GATHER, there
+                * must be a single target relation which must be included in this
+                * joinrel, so just don't allow Gather or Gather Merge here, full
+                * stop.
+                */
+               if (entry->tag == PGPA_TAG_NO_GATHER)
+               {
+                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                       full_match = true;
+               }
+               else
+               {
+                       int                     total_count;
+
+                       total_count = pjs->outer_count + pjs->inner_count;
+                       itm = pgpa_identifiers_match_target(total_count, pjs->rids,
+                                                                                               entry->target);
+                       Assert(itm != PGPA_ITM_DISJOINT);
+
+                       if (itm == PGPA_ITM_EQUAL)
+                       {
+                               full_match = true;
+                               if (entry->tag == PGPA_TAG_PARTITIONWISE)
+                                       my_partitionwise_outcome = 1;
+                               else if (entry->tag == PGPA_TAG_GATHER)
+                                       my_gather_mask = PGS_GATHER;
+                               else if (entry->tag == PGPA_TAG_GATHER_MERGE)
+                                       my_gather_mask = PGS_GATHER_MERGE;
+                               else
+                                       elog(ERROR, "unexpected advice tag: %d",
+                                                (int) entry->tag);
+                       }
+                       else
+                       {
+                               if (entry->tag == PGPA_TAG_PARTITIONWISE)
+                               {
+                                       my_partitionwise_outcome = -1;
+                                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                               }
+                               else if (entry->tag == PGPA_TAG_GATHER ||
+                                                entry->tag == PGPA_TAG_GATHER_MERGE)
+                               {
+                                       my_partitionwise_outcome = -1;
+                                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                               }
+                               else
+                                       elog(ERROR, "unexpected advice tag: %d",
+                                                (int) entry->tag);
+                       }
+               }
+
+               /*
+                * If we set my_gather_mask up above, then we (1) make a note if the
+                * advice conflicted, (2) remember the mask value, and (3) remember
+                * whether this was a full or partial match.
+                */
+               if (my_gather_mask != 0)
+               {
+                       if (gather_mask != 0 && gather_mask != my_gather_mask)
+                               gather_conflict = true;
+                       gather_mask = my_gather_mask;
+                       if (full_match)
+                               gather_full_match = bms_add_member(gather_full_match, i);
+                       else
+                               gather_partial_match = bms_add_member(gather_partial_match, i);
+               }
+
+               /*
+                * Likewise, if we set my_partitionwise_outcome up above, then we (1)
+                * make a note if the advice conflicted, (2) remember what the desired
+                * outcome was, and (3) remember whether this was a full or partial
+                * match.
+                */
+               if (my_partitionwise_outcome != 0)
+               {
+                       if (partitionwise_outcome != 0 &&
+                               partitionwise_outcome != my_partitionwise_outcome)
+                               partitionwise_conflict = true;
+                       partitionwise_outcome = my_partitionwise_outcome;
+                       if (full_match)
+                               partitionwise_full_match =
+                                       bms_add_member(partitionwise_full_match, i);
+                       else
+                               partitionwise_partial_match =
+                                       bms_add_member(partitionwise_partial_match, i);
+               }
+       }
+
+       /*
+        * Mark every Gather-related piece of advice as partially matched, and if
+        * the set of targets exactly matched this relation, fully matched. If
+        * there was a conflict, mark them all as conflicting.
+        */
+       flags = PGPA_TE_MATCH_PARTIAL;
+       if (gather_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(pjs->rel_entries, gather_partial_match, flags);
+       flags |= PGPA_TE_MATCH_FULL;
+       pgpa_trove_set_flags(pjs->rel_entries, gather_full_match, flags);
+
+       /* Likewise for partitionwise advice. */
+       flags = PGPA_TE_MATCH_PARTIAL;
+       if (partitionwise_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(pjs->rel_entries, partitionwise_partial_match, flags);
+       flags |= PGPA_TE_MATCH_FULL;
+       pgpa_trove_set_flags(pjs->rel_entries, partitionwise_full_match, flags);
+
+       /* If there is a non-conflicting gather specification, enforce it. */
+       if (gather_mask != 0 && !gather_conflict)
+       {
+               *pgs_mask_p &=
+                       ~(PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL);
+               *pgs_mask_p |= gather_mask;
+       }
+
+       /*
+        * If there is a non-conflicting partitionwise specification, enforce.
+        *
+        * To force a partitionwise join, we disable all the ordinary means of
+        * performing a join, and instead only Append and MergeAppend paths here.
+        * To prevent one, we just disable Append and MergeAppend.  Note that we
+        * must not unset PGS_CONSIDER_PARTITIONWISE even when we don't want a
+        * partitionwise join here, because we might want one at a higher level
+        * that is constructing using paths from this level.
+        */
+       if (partitionwise_outcome != 0 && !partitionwise_conflict)
+       {
+               if (partitionwise_outcome > 0)
+                       *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY) |
+                               PGS_APPEND | PGS_MERGE_APPEND | PGS_CONSIDER_PARTITIONWISE;
+               else
+                       *pgs_mask_p &= ~(PGS_APPEND | PGS_MERGE_APPEND);
+       }
+}
+
+/*
+ * Enforce restrictions on the join order or join method.
+ *
+ * Note that, although it is possible to view PARTITIONWISE advice as
+ * controlling the join method, we can't enforce it here, because the code
+ * path where this executes only deals with join paths that are built directly
+ * from a single outer path and a single inner path.
+ */
+static void
+pgpa_planner_apply_join_path_advice(JoinType jointype, uint64 *pgs_mask_p,
+                                                                       char *plan_name,
+                                                                       pgpa_join_state *pjs)
+{
+       int                     i = -1;
+       Bitmapset  *jo_permit_indexes = NULL;
+       Bitmapset  *jo_deny_indexes = NULL;
+       Bitmapset  *jm_indexes = NULL;
+       bool            jm_conflict = false;
+       uint32          join_mask = 0;
+
+       /* Iterate over all possibly-relevant advice. */
+       while ((i = bms_next_member(pjs->join_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *entry = &pjs->join_entries[i];
+               uint32          my_join_mask;
+
+               /* Handle join order advice. */
+               if (entry->tag == PGPA_TAG_JOIN_ORDER)
+               {
+                       if (pgpa_join_order_permits_join(pjs->outer_count,
+                                                                                        pjs->inner_count,
+                                                                                        pjs->rids,
+                                                                                        entry))
+                               jo_permit_indexes = bms_add_member(jo_permit_indexes, i);
+                       else
+                               jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       continue;
+               }
+
+               /* Handle join strategy advice. */
+               my_join_mask = pgpa_join_strategy_mask_from_advice_tag(entry->tag);
+               if (my_join_mask != 0)
+               {
+                       bool            permit;
+                       bool            restrict_method;
+
+                       if (entry->tag == PGPA_TAG_FOREIGN_JOIN)
+                               permit = pgpa_opaque_join_permits_join(pjs->outer_count,
+                                                                                                          pjs->inner_count,
+                                                                                                          pjs->rids,
+                                                                                                          entry,
+                                                                                                          &restrict_method);
+                       else
+                               permit = pgpa_join_method_permits_join(pjs->outer_count,
+                                                                                                          pjs->inner_count,
+                                                                                                          pjs->rids,
+                                                                                                          entry,
+                                                                                                          &restrict_method);
+                       if (!permit)
+                               jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       else if (restrict_method)
+                       {
+                               jo_permit_indexes = bms_add_member(jo_permit_indexes, i);
+                               jm_indexes = bms_add_member(jo_permit_indexes, i);
+                               if (join_mask != 0 && join_mask != my_join_mask)
+                                       jm_conflict = true;
+                               join_mask = my_join_mask;
+                       }
+                       continue;
+               }
+
+               /* Handle semijoin uniqueness advice. */
+               if (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE ||
+                       entry->tag == PGPA_TAG_SEMIJOIN_NON_UNIQUE)
+               {
+                       bool            advice_unique;
+                       bool            jt_unique;
+                       bool            jt_non_unique;
+                       bool            restrict_method;
+
+                       /* Advice wants to unique-ify and use a regular join? */
+                       advice_unique = (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE);
+
+                       /* Planner is trying to unique-ify and use a regular join? */
+                       jt_unique = (jointype == JOIN_UNIQUE_INNER ||
+                                                jointype == JOIN_UNIQUE_OUTER);
+
+                       /* Planner is trying a semi-join, without unique-ifying? */
+                       jt_non_unique = (jointype == JOIN_SEMI ||
+                                                        jointype == JOIN_RIGHT_SEMI);
+
+                       /*
+                        * These advice tags behave very much like join method advice, in
+                        * that they want the inner side of the semijoin to match the
+                        * relations listed in the advice. Hence, we test whether join
+                        * method advice would enforce a join order restriction here, and
+                        * disallow the join if not.
+                        *
+                        * XXX. Think harder about right semijoins.
+                        */
+                       if (!pgpa_join_method_permits_join(pjs->outer_count,
+                                                                                          pjs->inner_count,
+                                                                                          pjs->rids,
+                                                                                          entry,
+                                                                                          &restrict_method))
+                               jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       else if (restrict_method)
+                       {
+                               jo_permit_indexes = bms_add_member(jo_permit_indexes, i);
+                               if (!jt_unique && !jt_non_unique)
+                               {
+                                       /*
+                                        * This doesn't seem to be a semijoin to which SJ_UNIQUE
+                                        * or SJ_NON_UNIQUE can be applied.
+                                        */
+                                       entry->flags |= PGPA_TE_INAPPLICABLE;
+                               }
+                               else if (advice_unique != jt_unique)
+                                       jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+                       }
+                       continue;
+               }
+       }
+
+       /*
+        * If the advice indicates both that this join order is permissible and
+        * also that it isn't, then mark advice related to the join order as
+        * conflicting.
+        */
+       if (jo_permit_indexes != NULL && jo_deny_indexes != NULL)
+       {
+               pgpa_trove_set_flags(pjs->join_entries, jo_permit_indexes,
+                                                        PGPA_TE_CONFLICTING);
+               pgpa_trove_set_flags(pjs->join_entries, jo_deny_indexes,
+                                                        PGPA_TE_CONFLICTING);
+       }
+
+       /*
+        * If more than one join method specification is relevant here and they
+        * differ, mark them all as conflicting.
+        */
+       if (jm_conflict)
+               pgpa_trove_set_flags(pjs->join_entries, jm_indexes,
+                                                        PGPA_TE_CONFLICTING);
+
+       /*
+        * If we were advised to deny this join order, then do so. However, if we
+        * were also advised to permit it, then do nothing, since the advice
+        * conflicts.
+        */
+       if (jo_deny_indexes != NULL && jo_permit_indexes == NULL)
+               *pgs_mask_p = 0;
+
+       /*
+        * If we were advised to restrict the join method, then do so. However, if
+        * we got conflicting join method advice or were also advised to reject
+        * this join order completely, then instead do nothing.
+        */
+       if (join_mask != 0 && !jm_conflict && jo_deny_indexes == NULL)
+               *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY) | join_mask;
+}
+
+/*
+ * Translate an advice tag into a path generation strategy mask.
+ *
+ * This function can be called with tag types that don't represent join
+ * strategies. In such cases, we just return 0, which can't be confused with
+ * a valid mask.
+ */
+static uint64
+pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag)
+{
+       switch (tag)
+       {
+               case PGPA_TAG_FOREIGN_JOIN:
+                       return PGS_FOREIGNJOIN;
+               case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       return PGS_MERGEJOIN_PLAIN;
+               case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       return PGS_MERGEJOIN_MATERIALIZE;
+               case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       return PGS_NESTLOOP_PLAIN;
+               case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       return PGS_NESTLOOP_MATERIALIZE;
+               case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       return PGS_NESTLOOP_MEMOIZE;
+               case PGPA_TAG_HASH_JOIN:
+                       return PGS_HASHJOIN;
+               default:
+                       return 0;
+       }
+}
+
+/*
+ * Does a certain item of join order advice permit a certain join?
+ */
+static bool
+pgpa_join_order_permits_join(int outer_count, int inner_count,
+                                                        pgpa_identifier *rids,
+                                                        pgpa_trove_entry *entry)
+{
+       bool            loop = true;
+       bool            sublist = false;
+       int                     length;
+       int                     outer_length;
+       pgpa_advice_target *target = entry->target;
+       pgpa_advice_target *prefix_target;
+
+       /* We definitely have at least a partial match for this trove entry. */
+       entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+       /*
+        * Find the innermost sublist that contains all keys; if no sublist does,
+        * then continue processing with the toplevel list.
+        *
+        * For example, if the advice says JOIN_ORDER(t1 t2 (t3 t4 t5)), then we
+        * should evaluate joins that only involve t3, t4, and/or t5 against the
+        * (t3 t4 t5) sublist, and others against the full list.
+        *
+        * Note that (1) outermost sublist is always ordered and (2) whenever we
+        * zoom into an unordered sublist, we instantly accept the proposed join.
+        * If the advice says JOIN_ORDER(t1 t2 {t3 t4 t5}), any approach to
+        * joining t3, t4, and/or t5 is acceptable.
+        */
+       while (loop)
+       {
+               Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+               loop = false;
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       pgpa_itm_type itm;
+
+                       if (child_target->ttype == PGPA_TARGET_IDENTIFIER)
+                               continue;
+
+                       itm = pgpa_identifiers_match_target(outer_count + inner_count,
+                                                                                               rids, child_target);
+                       if (itm == PGPA_ITM_EQUAL || itm == PGPA_ITM_KEYS_ARE_SUBSET)
+                       {
+                               if (child_target->ttype == PGPA_TARGET_ORDERED_LIST)
+                               {
+                                       target = child_target;
+                                       sublist = true;
+                                       loop = true;
+                                       break;
+                               }
+                               else
+                               {
+                                       Assert(child_target->ttype == PGPA_TARGET_UNORDERED_LIST);
+                                       return true;
+                               }
+                       }
+               }
+       }
+
+       /*
+        * Try to find a prefix of the selected join order list that is exactly
+        * equal to the outer side of the proposed join.
+        */
+       length = list_length(target->children);
+       prefix_target = palloc0_object(pgpa_advice_target);
+       prefix_target->ttype = PGPA_TARGET_ORDERED_LIST;
+       for (outer_length = 1; outer_length <= length; ++outer_length)
+       {
+               pgpa_itm_type itm;
+
+               /* Avoid leaking memory in every loop iteration. */
+               if (prefix_target->children != NULL)
+                       list_free(prefix_target->children);
+               prefix_target->children = list_copy_head(target->children,
+                                                                                                outer_length);
+
+               /* Search, hoping to find an exact match. */
+               itm = pgpa_identifiers_match_target(outer_count, rids, prefix_target);
+               if (itm == PGPA_ITM_EQUAL)
+                       break;
+
+               /*
+                * If the prefix of the join order list that we're considering
+                * includes some but not all of the outer rels, we can make the prefix
+                * longer to find an exact match. But the advice hasn't mentioned
+                * everything that's part of our outer rel yet, but has mentioned
+                * things that are not, then this join doesn't match the join order
+                * list.
+                */
+               if (itm != PGPA_ITM_TARGETS_ARE_SUBSET)
+                       return false;
+       }
+
+       /*
+        * If the previous looped stopped before the prefix_target included the
+        * entire join order list, then the next member of the join order list
+        * must exactly match the inner side of the join.
+        *
+        * Example: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), if the outer side of the
+        * current join includes only t1, then the inner side must be exactly t2;
+        * if the outer side includes both t1 and t2, then the inner side must
+        * include exactly t3, t4, and t5.
+        */
+       if (outer_length < length)
+       {
+               pgpa_advice_target *inner_target;
+               pgpa_itm_type itm;
+
+               inner_target = list_nth(target->children, outer_length);
+
+               itm = pgpa_identifiers_match_target(inner_count, rids + outer_count,
+                                                                                       inner_target);
+
+               /*
+                * Before returning, consider whether we need to mark this entry as
+                * fully matched. If we found every item but one on the lefthand side
+                * of the join and the last item on the righthand side of the join,
+                * then the answer is yes.
+                */
+               if (outer_length + 1 == length && itm == PGPA_ITM_EQUAL)
+                       entry->flags |= PGPA_TE_MATCH_FULL;
+
+               return (itm == PGPA_ITM_EQUAL);
+       }
+
+       /*
+        * If we get here, then the outer side of the join includes the entirety
+        * of the join order list. In this case, we behave differently depending
+        * on whether we're looking at the top-level join order list or sublist.
+        * At the top-level, we treat the specified list as mandating that the
+        * actual join order has the given list as a prefix, but a sublist
+        * requires an exact match.
+        *
+        * Exmaple: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), we must start by joining
+        * all five of those relations and in that sequence, but once that is
+        * done, it's OK to join any other rels that are part of the join problem.
+        * This allows a user to specify the driving table and perhaps the first
+        * few things to which it should be joined while leaving the rest of the
+        * join order up the optimizer. But it seems like it would be surprising,
+        * given that specification, if the user could add t6 to the (t3 t4 t5)
+        * sub-join, so we don't allow that. If we did want to allow it, the logic
+        * earlier in this function would require substantial adjustment: we could
+        * allow the t3-t4-t5-t6 join to be built here, but the next step of
+        * joining t1-t2 to the result would still be rejected.
+        */
+       return !sublist;
+}
+
+/*
+ * Does a certain item of join method advice permit a certain join?
+ *
+ * Advice such as HASH_JOIN((x y)) means that there should be a hash join with
+ * exactly x and y on the inner side. Obviously, this means that if we are
+ * considering a join with exactly x and y on the inner side, we should enforce
+ * the use of a hash join. However, it also means that we must reject some
+ * incompatible join orders entirely.  For example, a join with exactly x
+ * and y on the outer side shouldn't be allowed, because such paths might win
+ * over the advice-driven path on cost.
+ *
+ * To accommodate these requirements, this function returns true if the join
+ * should be allowed and false if it should not. Furthermore, *restrict_method
+ * is set to true if the join method should be enforced and false if not.
+ */
+static bool
+pgpa_join_method_permits_join(int outer_count, int inner_count,
+                                                         pgpa_identifier *rids,
+                                                         pgpa_trove_entry *entry,
+                                                         bool *restrict_method)
+{
+       pgpa_advice_target *target = entry->target;
+       pgpa_itm_type inner_itm;
+       pgpa_itm_type outer_itm;
+       pgpa_itm_type join_itm;
+
+       /* We definitely have at least a partial match for this trove entry. */
+       entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+       *restrict_method = false;
+
+       /*
+        * If our inner rel mentions exactly the same relations as the advice
+        * target, allow the join and enforce the join method restriction.
+        *
+        * If our inner rel mentions a superset of the target relations, allow the
+        * join. The join we care about has already taken place, and this advice
+        * imposes no further restrictions.
+        */
+       inner_itm = pgpa_identifiers_match_target(inner_count,
+                                                                                         rids + outer_count,
+                                                                                         target);
+       if (inner_itm == PGPA_ITM_EQUAL)
+       {
+               entry->flags |= PGPA_TE_MATCH_FULL;
+               *restrict_method = true;
+               return true;
+       }
+       else if (inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+               return true;
+
+       /*
+        * If our outer rel mentions a supserset of the relations in the advice
+        * target, no restrictions apply. The join we care has already taken
+        * place, and this advice imposes no further restrictions.
+        *
+        * On the other hand, if our outer rel mentions exactly the relations
+        * mentioned in the advice target, the planner is trying to reverse the
+        * sides of the join as compared with our desired outcome. Reject that.
+        */
+       outer_itm = pgpa_identifiers_match_target(outer_count,
+                                                                                         rids, target);
+       if (outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+               return true;
+       else if (outer_itm == PGPA_ITM_EQUAL)
+               return false;
+
+       /*
+        * If the advice target mentions only a single relation, the test below
+        * cannot ever pass, so save some work by exiting now.
+        */
+       if (target->ttype == PGPA_TARGET_IDENTIFIER)
+               return false;
+
+       /*
+        * If everything in the joinrel is appears in the advice target, we're
+        * below the level of the join we want to control.
+        *
+        * For example, HASH_JOIN((x y)) doesn't restrict how x and y can be
+        * joined.
+        *
+        * This lookup shouldn't return PGPA_ITM_DISJOINT, because any such advice
+        * should not have been returned from the trove in the first place.
+        */
+       join_itm = pgpa_identifiers_match_target(outer_count + inner_count,
+                                                                                        rids, target);
+       Assert(join_itm != PGPA_ITM_DISJOINT);
+       if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET ||
+               join_itm == PGPA_ITM_EQUAL)
+               return true;
+
+       /*
+        * We've already permitted all allowable cases, so reject this.
+        *
+        * If we reach this point, then the advice overlaps with this join but
+        * isn't entirely contained within either side, and there's also at least
+        * one relation present in the join that isn't mentioned by the advice.
+        *
+        * For instance, in the HASH_JOIN((x y)) example, we would reach here if x
+        * were on one side of the join, y on the other, and at least one of the
+        * two sides also included some other relation, say t. In that case,
+        * accepting this join would allow the (x y t) joinrel to contain
+        * non-disabled paths that do not put (x y) on the inner side of a hash
+        * join; we could instead end up with something like (x JOIN t) JOIN y.
+        */
+       return false;
+}
+
+/*
+ * Does advice concerning an opaque join permit a certain join?
+ *
+ * By an opaque join, we mean one where the exact mechanism by which the
+ * join is performed is not visible to PostgreSQL. Currently this is the
+ * case only for foreign joins: FOREIGN_JOIN((x y z)) means that x, y, and
+ * z are joined on the remote side, but we know nothing about the join order
+ * or join methods used over there.
+ */
+static bool
+pgpa_opaque_join_permits_join(int outer_count, int inner_count,
+                                                         pgpa_identifier *rids,
+                                                         pgpa_trove_entry *entry,
+                                                         bool *restrict_method)
+{
+       pgpa_advice_target *target = entry->target;
+       pgpa_itm_type join_itm;
+
+       /* We definitely have at least a partial match for this trove entry. */
+       entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+       *restrict_method = false;
+
+       join_itm = pgpa_identifiers_match_target(outer_count + inner_count,
+                                                                                        rids, target);
+       if (join_itm == PGPA_ITM_EQUAL)
+       {
+               /*
+                * We have an exact match, and should therefore allow the join and
+                * enforce the use of the relevant opaque join method.
+                */
+               entry->flags |= PGPA_TE_MATCH_FULL;
+               *restrict_method = true;
+               return true;
+       }
+
+       if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET ||
+               join_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+       {
+               /*
+                * If join_itm == PGPA_ITM_TARGETS_ARE_SUBSET, then the join we care
+                * about has already taken place and no further restrictions apply.
+                *
+                * If join_itm == PGPA_ITM_KEYS_ARE_SUBSET, we're still building up to
+                * the join we care about and have not introduced any extraneous
+                * relations not named in the advice. Note that ForeignScan paths for
+                * joins are built up from ForeignScan paths from underlying joins and
+                * scans, so we must not disable this join when considering a subset
+                * of the relations we ultimately want.
+                */
+               return true;
+       }
+
+       /*
+        * The advice overlaps the join, but at least one relation is present in
+        * the join that isn't mentioned by the advice. We want to disable such
+        * paths so that we actually push down the join as intended.
+        */
+       return false;
+}
+
+/*
+ * Apply scan advice to a RelOptInfo.
+ *
+ * XXX. For bitmap heap scans, we're just ignoring the index information from
+ * the advice. That's not cool.
+ */
+static void
+pgpa_planner_apply_scan_advice(RelOptInfo *rel,
+                                                          pgpa_trove_entry *scan_entries,
+                                                          Bitmapset *scan_indexes,
+                                                          pgpa_trove_entry *rel_entries,
+                                                          Bitmapset *rel_indexes)
+{
+       bool            gather_conflict = false;
+       Bitmapset  *gather_partial_match = NULL;
+       Bitmapset  *gather_full_match = NULL;
+       int                     i = -1;
+       pgpa_trove_entry *scan_entry = NULL;
+       int                     flags;
+       bool            scan_type_conflict = false;
+       Bitmapset  *scan_type_indexes = NULL;
+       Bitmapset  *scan_type_rel_indexes = NULL;
+       uint64          gather_mask = 0;
+       uint64          scan_type = 0;
+
+       /* Scrutinize available scan advice. */
+       while ((i = bms_next_member(scan_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *my_entry = &scan_entries[i];
+               uint64          my_scan_type = 0;
+
+               /* Translate our advice tags to a scan strategy advice value. */
+               if (my_entry->tag == PGPA_TAG_BITMAP_HEAP_SCAN)
+                       my_scan_type = PGS_BITMAPSCAN;
+               else if (my_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN)
+                       my_scan_type = PGS_INDEXONLYSCAN | PGS_CONSIDER_INDEXONLY;
+               else if (my_entry->tag == PGPA_TAG_INDEX_SCAN)
+                       my_scan_type = PGS_INDEXSCAN;
+               else if (my_entry->tag == PGPA_TAG_SEQ_SCAN)
+                       my_scan_type = PGS_SEQSCAN;
+               else if (my_entry->tag == PGPA_TAG_TID_SCAN)
+                       my_scan_type = PGS_TIDSCAN;
+
+               /*
+                * If this is understandable scan advice, hang on to the entry, the
+                * inferred scan type type, and the index at which we found it.
+                *
+                * Also make a note if we see conflicting scan type advice. Note that
+                * we regard two index specifications as conflicting unless they match
+                * exactly. In theory, perhaps we could regard INDEX_SCAN(a c) and
+                * INDEX_SCAN(a b.c) as non-conflicting if it happens that the only
+                * index named c is in schema b, but it doesn't seem worth the code.
+                */
+               if (my_scan_type != 0)
+               {
+                       if (scan_type != 0 && scan_type != my_scan_type)
+                               scan_type_conflict = true;
+                       if (!scan_type_conflict && scan_entry != NULL &&
+                               my_entry->target->itarget != NULL &&
+                               scan_entry->target->itarget != NULL &&
+                               !pgpa_index_targets_equal(scan_entry->target->itarget,
+                                                                                 my_entry->target->itarget))
+                               scan_type_conflict = true;
+                       scan_entry = my_entry;
+                       scan_type = my_scan_type;
+                       scan_type_indexes = bms_add_member(scan_type_indexes, i);
+               }
+       }
+
+       /* Scrutinize available gather-related and partitionwise advice. */
+       i = -1;
+       while ((i = bms_next_member(rel_indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *my_entry = &rel_entries[i];
+               uint64          my_gather_mask = 0;
+               bool            just_one_rel;
+
+               just_one_rel = my_entry->target->ttype == PGPA_TARGET_IDENTIFIER
+                       || list_length(my_entry->target->children) == 1;
+
+               /*
+                * PARTITIONWISE behaves like a scan type, except that if there's more
+                * than one relation targeted, it has no effect at this level.
+                */
+               if (my_entry->tag == PGPA_TAG_PARTITIONWISE)
+               {
+                       if (just_one_rel)
+                       {
+                               const uint64 my_scan_type = PGS_APPEND | PGS_MERGE_APPEND;
+
+                               if (scan_type != 0 && scan_type != my_scan_type)
+                                       scan_type_conflict = true;
+                               scan_entry = my_entry;
+                               scan_type = my_scan_type;
+                               scan_type_rel_indexes =
+                                       bms_add_member(scan_type_rel_indexes, i);
+                       }
+                       continue;
+               }
+
+               /*
+                * GATHER and GATHER_MERGE applied to a single rel mean that we should
+                * use the correspondings strategy here, while applying either to more
+                * than one rel means we should not use those strategies here, but
+                * rather at the level of the joinrel that corresponds to what was
+                * specified. NO_GATHER can only be applied to single rels.
+                *
+                * Note that setting PGS_CONSIDER_NONPARTIAL in my_gather_mask is
+                * equivalent to allowing the non-use of either form of Gather here.
+                */
+               if (my_entry->tag == PGPA_TAG_GATHER ||
+                       my_entry->tag == PGPA_TAG_GATHER_MERGE)
+               {
+                       if (!just_one_rel)
+                               my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+                       else if (my_entry->tag == PGPA_TAG_GATHER)
+                               my_gather_mask = PGS_GATHER;
+                       else
+                               my_gather_mask = PGS_GATHER_MERGE;
+               }
+               else if (my_entry->tag == PGPA_TAG_NO_GATHER)
+               {
+                       Assert(just_one_rel);
+                       my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+               }
+
+               /*
+                * If we set my_gather_mask up above, then we (1) make a note if the
+                * advice conflicted, (2) remember the mask value, and (3) remember
+                * whether this was a full or partial match.
+                */
+               if (my_gather_mask != 0)
+               {
+                       if (gather_mask != 0 && gather_mask != my_gather_mask)
+                               gather_conflict = true;
+                       gather_mask = my_gather_mask;
+                       if (just_one_rel)
+                               gather_full_match = bms_add_member(gather_full_match, i);
+                       else
+                               gather_partial_match = bms_add_member(gather_partial_match, i);
+               }
+       }
+
+       /* Enforce choice of index. */
+       if (scan_entry != NULL && !scan_type_conflict &&
+               (scan_entry->tag == PGPA_TAG_INDEX_SCAN ||
+                scan_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN))
+       {
+               pgpa_index_target *itarget = scan_entry->target->itarget;
+               IndexOptInfo *matched_index = NULL;
+
+               Assert(itarget->itype == PGPA_INDEX_NAME);
+
+               foreach_node(IndexOptInfo, index, rel->indexlist)
+               {
+                       char       *relname = get_rel_name(index->indexoid);
+                       Oid                     nspoid = get_rel_namespace(index->indexoid);
+                       char       *relnamespace = get_namespace_name(nspoid);
+
+                       if (strcmp(itarget->indname, relname) == 0 &&
+                               (itarget->indnamespace == NULL ||
+                                strcmp(itarget->indnamespace, relnamespace) == 0))
+                       {
+                               matched_index = index;
+                               break;
+                       }
+               }
+
+               if (matched_index == NULL)
+               {
+                       /* Don't force the scan type if the index doesn't exist. */
+                       scan_type = 0;
+
+                       /* Mark advice as inapplicable. */
+                       pgpa_trove_set_flags(scan_entries, scan_type_indexes,
+                                                                PGPA_TE_INAPPLICABLE);
+               }
+               else
+               {
+                       /* Retain this index and discard the rest. */
+                       rel->indexlist = list_make1(matched_index);
+               }
+       }
+
+       /*
+        * Mark all the scan method entries as fully matched; and if they specify
+        * different things, mark them all as conflicting.
+        */
+       flags = PGPA_TE_MATCH_PARTIAL | PGPA_TE_MATCH_FULL;
+       if (scan_type_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(scan_entries, scan_type_indexes, flags);
+       pgpa_trove_set_flags(rel_entries, scan_type_rel_indexes, flags);
+
+       /*
+        * Mark every Gather-related piece of advice as partially matched. Mark
+        * the ones that included this relation as a target by itself as fully
+        * matched. If there was a conflict, mark them all as conflicting.
+        */
+       flags = PGPA_TE_MATCH_PARTIAL;
+       if (gather_conflict)
+               flags |= PGPA_TE_CONFLICTING;
+       pgpa_trove_set_flags(rel_entries, gather_partial_match, flags);
+       flags |= PGPA_TE_MATCH_FULL;
+       pgpa_trove_set_flags(rel_entries, gather_full_match, flags);
+
+       /* If there is a non-conflicting scan specification, enforce it. */
+       if (scan_type != 0 && !scan_type_conflict)
+       {
+               rel->pgs_mask &=
+                       ~(PGS_SCAN_ANY | PGS_APPEND | PGS_MERGE_APPEND |
+                         PGS_CONSIDER_INDEXONLY);
+               rel->pgs_mask |= scan_type;
+       }
+
+       /* If there is a non-conflicting gather specification, enforce it. */
+       if (gather_mask != 0 && !gather_conflict)
+       {
+               rel->pgs_mask &=
+                       ~(PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL);
+               rel->pgs_mask |= gather_mask;
+       }
+}
+
+/*
+ * Add feedback entries to for one trove slice to the provided list and
+ * return the resulting list.
+ *
+ * Feedback entries are generated from the trove entry's flags. It's assumed
+ * that the caller has already set all relevant flags with the exception of
+ * PGPA_TE_FAILED. We set that flag here if appropriate.
+ */
+static List *
+pgpa_planner_append_feedback(List *list, pgpa_trove *trove,
+                                                        pgpa_trove_lookup_type type,
+                                                        pgpa_identifier *rt_identifiers,
+                                                        pgpa_plan_walker_context *walker)
+{
+       pgpa_trove_entry *entries;
+       int                     nentries;
+       StringInfoData buf;
+
+       initStringInfo(&buf);
+       pgpa_trove_lookup_all(trove, type, &entries, &nentries);
+       for (int i = 0; i < nentries; ++i)
+       {
+               pgpa_trove_entry *entry = &entries[i];
+               DefElem    *item;
+
+               /*
+                * If this entry was fully matched, check whether generating advice
+                * from this plan would produce such an entry. If not, label the entry
+                * as failed.
+                */
+               if ((entry->flags & PGPA_TE_MATCH_FULL) != 0 &&
+                       !pgpa_walker_would_advise(walker, rt_identifiers,
+                                                                         entry->tag, entry->target))
+                       entry->flags |= PGPA_TE_FAILED;
+
+               item = makeDefElem(pgpa_cstring_trove_entry(entry),
+                                                  (Node *) makeInteger(entry->flags), -1);
+               list = lappend(list, item);
+       }
+
+       return list;
+}
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * Fast hash function for a key consisting of an RTI and plan name.
+ */
+static uint32
+pgpa_ri_checker_hash_key(pgpa_ri_checker_key key)
+{
+       fasthash_state hs;
+       int                     sp_len;
+
+       fasthash_init(&hs, 0);
+
+       hs.accum = key.rti;
+       fasthash_combine(&hs);
+
+       /* plan_name can be NULL */
+       if (key.plan_name == NULL)
+               sp_len = 0;
+       else
+               sp_len = fasthash_accum_cstring(&hs, key.plan_name);
+
+       /* hashfn_unstable.h recommends using string length as tweak */
+       return fasthash_final32(&hs, sp_len);
+}
+
+#endif
+
+/*
+ * Save the range table identifier for one relation for future cross-checking.
+ */
+static void
+pgpa_ri_checker_save(pgpa_planner_state *pps, PlannerInfo *root,
+                                        RelOptInfo *rel)
+{
+#ifdef USE_ASSERT_CHECKING
+       pgpa_ri_checker_key key;
+       pgpa_ri_checker *check;
+       pgpa_identifier rid;
+       const char *rid_string;
+       bool            found;
+
+       key.rti = bms_singleton_member(rel->relids);
+       key.plan_name = root->plan_name;
+       pgpa_compute_identifier_by_rti(root, key.rti, &rid);
+       rid_string = pgpa_identifier_string(&rid);
+       check = pgpa_ri_check_insert(pps->ri_check_hash, key, &found);
+       Assert(!found || strcmp(check->rid_string, rid_string) == 0);
+       check->rid_string = rid_string;
+#endif
+}
+
+/*
+ * Validate that the range table identifiers we were able to generate during
+ * planning match the ones we generated from the final plan.
+ */
+static void
+pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt)
+{
+#ifdef USE_ASSERT_CHECKING
+       pgpa_identifier *rt_identifiers;
+       pgpa_ri_check_iterator it;
+       pgpa_ri_checker *check;
+
+       /* Create identifiers from the planned statement. */
+       rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
+
+       /* Iterate over identifiers created during planning, so we can compare. */
+       pgpa_ri_check_start_iterate(pps->ri_check_hash, &it);
+       while ((check = pgpa_ri_check_iterate(pps->ri_check_hash, &it)) != NULL)
+       {
+               int                     rtoffset = 0;
+               const char *rid_string;
+               Index           flat_rti;
+
+               /*
+                * If there's no plan name associated with this entry, then the
+                * rtoffset is 0. Otherwise, we can search the SubPlanRTInfo list to
+                * find the rtoffset.
+                */
+               if (check->key.plan_name != NULL)
+               {
+                       foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos)
+                       {
+                               /*
+                                * If rtinfo->dummy is set, then the subquery's range table
+                                * will only have been partially copied to the final range
+                                * table. Specifically, only RTE_RELATION entries and
+                                * RTE_SUBQUERY entries that were once RTE_RELATION entries
+                                * will be copied, as per add_rtes_to_flat_rtable. Therefore,
+                                * there's no fixed rtoffset that we can apply to the RTIs
+                                * used during planning to locate the corresponding relations
+                                * in the final rtable.
+                                *
+                                * With more complex logic, we could work around that problem
+                                * by remembering the whole contents of the subquery's rtable
+                                * during planning, determining which of those would have been
+                                * copied to the final rtable, and matching them up. But it
+                                * doesn't seem like a worthwhile endeavor for right now,
+                                * because RTIs from such subqueries won't appear in the plan
+                                * tree itself, just in the range table. Hence, we can neither
+                                * generate nor accept advice for them.
+                                */
+                               if (strcmp(check->key.plan_name, rtinfo->plan_name) == 0
+                                       && !rtinfo->dummy)
+                               {
+                                       rtoffset = rtinfo->rtoffset;
+                                       Assert(rtoffset > 0);
+                                       break;
+                               }
+                       }
+
+                       /*
+                        * It's not an error if we don't find the plan name: that just
+                        * means that we planned a subplan by this name but it ended up
+                        * being a dummy subplan and so wasn't included in the final plan
+                        * tree.
+                        */
+                       if (rtoffset == 0)
+                               continue;
+               }
+
+               /*
+                * check->key.rti is the RTI that we saw prior to range-table
+                * flattening, so we must add the appropriate RT offset to get the
+                * final RTI.
+                */
+               flat_rti = check->key.rti + rtoffset;
+               Assert(flat_rti <= list_length(pstmt->rtable));
+
+               /* Assert that the string we compute now matches the previous one. */
+               rid_string = pgpa_identifier_string(&rt_identifiers[flat_rti - 1]);
+               Assert(strcmp(rid_string, check->rid_string) == 0);
+       }
+#endif
+}
diff --git a/contrib/pg_plan_advice/pgpa_planner.h b/contrib/pg_plan_advice/pgpa_planner.h
new file mode 100644 (file)
index 0000000..7d40b91
--- /dev/null
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_planner.h
+ *       planner hooks
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_planner.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_PLANNER_H
+#define PGPA_PLANNER_H
+
+extern void pgpa_planner_install_hooks(void);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_scan.c b/contrib/pg_plan_advice/pgpa_scan.c
new file mode 100644 (file)
index 0000000..dbd7c99
--- /dev/null
@@ -0,0 +1,278 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.c
+ *       analysis of scans in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_scan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/parsenodes.h"
+#include "parser/parsetree.h"
+
+static pgpa_scan *pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                                                                pgpa_scan_strategy strategy,
+                                                                Bitmapset *relids,
+                                                                bool beneath_any_gather);
+
+
+static Bitmapset *filter_out_join_relids(Bitmapset *relids, List *rtable);
+static RTEKind unique_nonjoin_rtekind(Bitmapset *relids, List *rtable);
+
+/*
+ * Build a pgpa_scan object for a Plan node and update the plan walker
+ * context as appopriate.  If this is an Append or MergeAppend scan, also
+ * build pgpa_scan for any scans that were consolidated into this one by
+ * Append/MergeAppend pull-up.
+ *
+ * If there is at least one ElidedNode for this plan node, pass the uppermost
+ * one as elided_node, else pass NULL.
+ *
+ * Set the 'beneath_any_gather' node if we are underneath a Gather or
+ * Gather Merge node.
+ *
+ * Set the 'within_join_problem' flag if we're inside of a join problem and
+ * not otherwise.
+ */
+pgpa_scan *
+pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                               ElidedNode *elided_node,
+                               bool beneath_any_gather, bool within_join_problem)
+{
+       pgpa_scan_strategy strategy = PGPA_SCAN_ORDINARY;
+       Bitmapset  *relids = NULL;
+       int                     rti = -1;
+       List       *child_append_relid_sets = NIL;
+
+       if (elided_node != NULL)
+       {
+               NodeTag         elided_type = elided_node->elided_type;
+
+               /*
+                * If setrefs processing elided an Append or MergeAppend node that had
+                * only one surviving child, then this is a partitionwise "scan" --
+                * which may really be a partitionwise join, but there's no need to
+                * distinguish.
+                *
+                * If it's a trivial SubqueryScan that was elided, then this is an
+                * "ordinary" scan i.e. one for which we need to generate advice
+                * because the planner has not made any meaningful choice.
+                */
+               relids = elided_node->relids;
+               if (elided_type == T_Append || elided_type == T_MergeAppend)
+                       strategy = PGPA_SCAN_PARTITIONWISE;
+               else
+                       strategy = PGPA_SCAN_ORDINARY;
+
+               /* Join RTIs can be present, but advice never refers to them. */
+               relids = filter_out_join_relids(relids, walker->pstmt->rtable);
+       }
+       else if ((rti = pgpa_scanrelid(plan)) != 0)
+       {
+               relids = bms_make_singleton(rti);
+
+               switch (nodeTag(plan))
+               {
+                       case T_SeqScan:
+                               strategy = PGPA_SCAN_SEQ;
+                               break;
+                       case T_BitmapHeapScan:
+                               strategy = PGPA_SCAN_BITMAP_HEAP;
+                               break;
+                       case T_IndexScan:
+                               strategy = PGPA_SCAN_INDEX;
+                               break;
+                       case T_IndexOnlyScan:
+                               strategy = PGPA_SCAN_INDEX_ONLY;
+                               break;
+                       case T_TidScan:
+                       case T_TidRangeScan:
+                               strategy = PGPA_SCAN_TID;
+                               break;
+                       default:
+
+                               /*
+                                * This case includes a ForeignScan targeting a single
+                                * relation; no other strategy is possible in that case, but
+                                * see below, where things are different in multi-relation
+                                * cases.
+                                */
+                               strategy = PGPA_SCAN_ORDINARY;
+                               break;
+               }
+       }
+       else if ((relids = pgpa_relids(plan)) != NULL)
+       {
+               switch (nodeTag(plan))
+               {
+                       case T_ForeignScan:
+
+                               /*
+                                * If multiple relations are being targeted by a single
+                                * foreign scan, then the foreign join has been pushed to the
+                                * remote side, and we want that to be reflected in the
+                                * generated advice.
+                                */
+                               strategy = PGPA_SCAN_FOREIGN;
+                               break;
+                       case T_Append:
+
+                               /*
+                                * Append nodes can represent partitionwise scans of a a
+                                * relation, but when they implement a set operation, they are
+                                * just ordinary scans.
+                                */
+                               if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable)
+                                       == RTE_RELATION)
+                                       strategy = PGPA_SCAN_PARTITIONWISE;
+                               else
+                                       strategy = PGPA_SCAN_ORDINARY;
+                               child_append_relid_sets =
+                                       ((Append *) plan)->child_append_relid_sets;
+                               break;
+                       case T_MergeAppend:
+                               /* Some logic here as for Append, above. */
+                               if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable)
+                                       == RTE_RELATION)
+                                       strategy = PGPA_SCAN_PARTITIONWISE;
+                               else
+                                       strategy = PGPA_SCAN_ORDINARY;
+                               child_append_relid_sets =
+                                       ((MergeAppend *) plan)->child_append_relid_sets;
+                               break;
+                       default:
+                               strategy = PGPA_SCAN_ORDINARY;
+                               break;
+               }
+
+
+               /* Join RTIs can be present, but advice never refers to them. */
+               relids = filter_out_join_relids(relids, walker->pstmt->rtable);
+       }
+
+       /*
+        * If this is an Append or MergeAppend node into which subordinate Append
+        * or MergeAppend paths were merged, each of those merged paths is
+        * effectively another scan for which we need to account.
+        */
+       foreach_node(Bitmapset, child_relids, child_append_relid_sets)
+       {
+               Bitmapset  *child_nonjoin_relids;
+
+               child_nonjoin_relids = filter_out_join_relids(child_relids,
+                                                                                                         walker->pstmt->rtable);
+               (void) pgpa_make_scan(walker, plan, strategy,
+                                                         child_nonjoin_relids,
+                                                         beneath_any_gather);
+       }
+
+       /*
+        * If this plan node has no associated RTIs, it's not a scan. When the
+        * 'within_join_problem' flag is set, that's unexpected, so throw an
+        * error, else return quietly.
+        */
+       if (relids == NULL)
+       {
+               if (within_join_problem)
+                       elog(ERROR, "plan node has no RTIs: %d", (int) nodeTag(plan));
+               return NULL;
+       }
+
+       return pgpa_make_scan(walker, plan, strategy, relids, beneath_any_gather);
+}
+
+/*
+ * Create a single pgpa_scan object and update the pgpa_plan_walker_context.
+ */
+static pgpa_scan *
+pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                          pgpa_scan_strategy strategy, Bitmapset *relids,
+                          bool beneath_any_gather)
+{
+       pgpa_scan  *scan;
+
+       /* Create the scan object. */
+       scan = palloc(sizeof(pgpa_scan));
+       scan->plan = plan;
+       scan->strategy = strategy;
+       scan->relids = relids;
+       scan->beneath_any_gather = beneath_any_gather;
+
+       /* Add it to the appropriate list. */
+       walker->scans[scan->strategy] = lappend(walker->scans[scan->strategy],
+                                                                                       scan);
+
+       /*
+        * We intend to emit NO_GATHER() advice for each scan that doesn't appear
+        * beneath a Gather or Gather Merge node, but we need not do this for
+        * partitionwise scans, because emitting NO_GATHER() for the child scans
+        * suffices.
+        */
+       if (!scan->beneath_any_gather && scan->strategy != PGPA_SCAN_PARTITIONWISE)
+               walker->no_gather_scans = bms_add_members(walker->no_gather_scans,
+                                                                                                 scan->relids);
+
+       return scan;
+}
+
+/*
+ * Determine the unique rtekind of a set of relids.
+ */
+static RTEKind
+unique_nonjoin_rtekind(Bitmapset *relids, List *rtable)
+{
+       int                     rti = -1;
+       bool            first = true;
+       RTEKind         rtekind;
+
+       Assert(relids != NULL);
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+               if (rte->rtekind == RTE_JOIN)
+                       continue;
+
+               if (first)
+               {
+                       rtekind = rte->rtekind;
+                       first = false;
+               }
+               else if (rtekind != rte->rtekind)
+                       elog(ERROR, "rtekind mismatch: %d vs. %d",
+                                rtekind, rte->rtekind);
+       }
+
+       if (first)
+               elog(ERROR, "no non-RTE_JOIN RTEs found");
+
+       return rtekind;
+}
+
+/*
+ * Construct a new Bitmapset containing non-RTE_JOIN members of 'relids'.
+ */
+static Bitmapset *
+filter_out_join_relids(Bitmapset *relids, List *rtable)
+{
+       int                     rti = -1;
+       Bitmapset  *result = NULL;
+
+       while ((rti = bms_next_member(relids, rti)) >= 0)
+       {
+               RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+               if (rte->rtekind != RTE_JOIN)
+                       result = bms_add_member(result, rti);
+       }
+
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_scan.h b/contrib/pg_plan_advice/pgpa_scan.h
new file mode 100644 (file)
index 0000000..90a08b4
--- /dev/null
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.h
+ *       analysis of scans in Plan trees
+ *
+ * For purposes of this module, a "scan" includes (1) single plan nodes that
+ * scan multiple RTIs, such as a degenerate Result node that replaces what
+ * would otherwise have been a join, and (2) Append and MergeAppend nodes
+ * implementing a partitionwise scan or a partitionwise join. Said
+ * differently, scans are the leaves of the join tree for a single join
+ * problem.
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_scan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_SCAN_H
+#define PGPA_SCAN_H
+
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_plan_walker_context pgpa_plan_walker_context;
+
+/*
+ * Scan strategies.
+ *
+ * PGPA_SCAN_ORDINARY is any scan strategy that isn't interesting to us
+ * because there is no meaningful planner decision involved. For example,
+ * the only way to scan a subquery is a SubqueryScan, and the only way to
+ * scan a VALUES construct is a ValuesScan. We need not care exactly which
+ * type of planner node was used in such cases, because the same thing will
+ * happen when replanning.
+ *
+ * PGPA_SCAN_ORDINARY also includes Result nodes that correspond to scans
+ * or even joins that are proved empty. We don't know whether or not the scan
+ * or join will still be provably empty at replanning time, but if it is,
+ * then no scan-type advice is needed, and if it's not, we can't recommend
+ * a scan type based on the current plan.
+ *
+ * PGPA_SCAN_PARTITIONWISE also lumps together scans and joins: this can
+ * be either a partitionwise scan of a partitioned table or a partitionwise
+ * join between several partitioned tables. Note that all decisions about
+ * whether or not to use partitionwise join are meaningful: no matter what
+ * we decided this time, we could do more or fewer things partitionwise the
+ * next time.
+ *
+ * PGPA_SCAN_FOREIGN is only used when there's more than one relation involved;
+ * a single-table foreign scan is classified as ordinary, since there is no
+ * decision to make in that case.
+ *
+ * Other scan strategies map one-to-one to plan nodes.
+ */
+typedef enum
+{
+       PGPA_SCAN_ORDINARY = 0,
+       PGPA_SCAN_SEQ,
+       PGPA_SCAN_BITMAP_HEAP,
+       PGPA_SCAN_FOREIGN,
+       PGPA_SCAN_INDEX,
+       PGPA_SCAN_INDEX_ONLY,
+       PGPA_SCAN_PARTITIONWISE,
+       PGPA_SCAN_TID
+       /* update NUM_PGPA_SCAN_STRATEGY if you add anything here */
+} pgpa_scan_strategy;
+
+#define NUM_PGPA_SCAN_STRATEGY ((int) PGPA_SCAN_TID + 1)
+
+/*
+ * All of the details we need regarding a scan.
+ */
+typedef struct pgpa_scan
+{
+       Plan       *plan;
+       pgpa_scan_strategy strategy;
+       Bitmapset  *relids;
+       bool            beneath_any_gather;
+} pgpa_scan;
+
+extern pgpa_scan *pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan,
+                                                                 ElidedNode *elided_node,
+                                                                 bool beneath_any_gather,
+                                                                 bool within_join_problem);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_scanner.l b/contrib/pg_plan_advice/pgpa_scanner.l
new file mode 100644 (file)
index 0000000..be7d7ba
--- /dev/null
@@ -0,0 +1,299 @@
+%top{
+/*
+ * Scanner for plan advice
+ *
+ * Copyright (c) 2000-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_scanner.l
+ */
+#include "postgres.h"
+
+#include "common/string.h"
+#include "nodes/miscnodes.h"
+#include "parser/scansup.h"
+
+#include "pgpa_ast.h"
+#include "pgpa_parser.h"
+
+/*
+ * Extra data that we pass around when during scanning.
+ *
+ * 'litbuf' is used to implement the <xd> exclusive state, which handles
+ * double-quoted identifiers.
+ */
+typedef struct pgpa_yy_extra_type
+{
+       StringInfoData  litbuf;
+} pgpa_yy_extra_type;
+
+}
+
+%{
+/* LCOV_EXCL_START */
+
+#define YY_DECL \
+       extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result, \
+                                                 char **parse_error_msg_p, yyscan_t yyscanner)
+
+/* No reason to constrain amount of data slurped */
+#define YY_READ_BUF_SIZE 16777216
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#undef fprintf
+#define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)
+
+static void
+fprintf_to_ereport(const char *fmt, const char *msg)
+{
+       ereport(ERROR, (errmsg_internal("%s", msg)));
+}
+%}
+
+%option reentrant
+%option bison-bridge
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option noyyalloc
+%option noyyrealloc
+%option noyyfree
+%option warn
+%option prefix="pgpa_yy"
+%option extra-type="pgpa_yy_extra_type *"
+
+/*
+ * What follows is a severely stripped-down version of the core scanner. We
+ * only care about recognizing identifiers with or without identifier quoting
+ * (i.e. double-quoting), decimal integers, and a small handful of other
+ * things. Keep these rules in sync with src/backend/parser/scan.l. As in that
+ * file, we use an exclusive state called 'xc' for C-style comments, and an
+ * exclusive state called 'xd' for double-quoted identifiers.
+ */
+%x xc
+%x xd
+
+ident_start            [A-Za-z\200-\377_]
+ident_cont             [A-Za-z\200-\377_0-9\$]
+
+identifier             {ident_start}{ident_cont}*
+
+decdigit               [0-9]
+decinteger             {decdigit}(_?{decdigit})*
+
+space                  [ \t\n\r\f\v]
+whitespace             {space}+
+
+dquote                 \"
+xdstart                        {dquote}
+xdstop                 {dquote}
+xddouble               {dquote}{dquote}
+xdinside               [^"]+
+
+xcstart                        \/\*
+xcstop                 \*+\/
+xcinside               [^*/]+
+
+%%
+
+{whitespace}   { /* ignore */ }
+
+{identifier}   {
+                                       char   *str;
+                                       bool    fail;
+                                       pgpa_advice_tag_type    tag;
+
+                                       /*
+                                        * Unlike the core scanner, we don't truncate identifiers
+                                        * here. There is no obvious reason to do so.
+                                        */
+                                       str = downcase_identifier(yytext, yyleng, false, false);
+                                       yylval->str = str;
+
+                                       /*
+                                        * If it's not a tag, just return TOK_IDENT; else, return
+                                        * a token type based on how further parsing should
+                                        * proceed.
+                                        */
+                                       tag = pgpa_parse_advice_tag(str, &fail);
+                                       if (fail)
+                                               return TOK_IDENT;
+                                       else if (tag == PGPA_TAG_JOIN_ORDER)
+                                               return TOK_TAG_JOIN_ORDER;
+                                       else if (tag == PGPA_TAG_INDEX_SCAN ||
+                                                        tag == PGPA_TAG_INDEX_ONLY_SCAN)
+                                               return TOK_TAG_INDEX;
+                                       else if (tag == PGPA_TAG_BITMAP_HEAP_SCAN)
+                                               return TOK_TAG_BITMAP;
+                                       else if (tag == PGPA_TAG_SEQ_SCAN ||
+                                                        tag == PGPA_TAG_TID_SCAN ||
+                                                        tag == PGPA_TAG_NO_GATHER)
+                                               return TOK_TAG_SIMPLE;
+                                       else
+                                               return TOK_TAG_GENERIC;
+                               }
+
+{decinteger}   {
+                                       char   *endptr;
+
+                                       errno = 0;
+                                       yylval->integer = strtoint(yytext, &endptr, 10);
+                                       if (*endptr != '\0' || errno == ERANGE)
+                                               pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                        "integer out of range");
+                                       return TOK_INTEGER;
+                               }
+
+{xcstart}              {
+                                       BEGIN(xc);
+                               }
+
+{xdstart}              {
+                                       BEGIN(xd);
+                                       resetStringInfo(&yyextra->litbuf);
+                               }
+
+"||"                   { return TOK_OR; }
+
+"&&"                   { return TOK_AND; }
+
+.                              { return yytext[0]; }
+
+<xc>{xcstop}   {
+                                       BEGIN(INITIAL);
+                               }
+
+<xc>{xcinside} {
+                                       /* discard multiple characters without slash or asterisk */
+                               }
+
+<xc>.                  {
+                                       /*
+                                        * Discard any single character. flex prefers longer
+                                        * matches, so this rule will never be picked when we could
+                                        * have matched xcstop.
+                                        *
+                                        * NB: At present, we don't bother to support nested
+                                        * C-style comments here, but this logic could be extended
+                                        * if that restriction poses a problem.
+                                        */
+                               }
+
+<xc><<EOF>>            {
+                                       BEGIN(INITIAL);
+                                       pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                "unterminated comment");
+                               }
+
+<xd>{xdstop}   {
+                                       BEGIN(INITIAL);
+                                       yylval->str = pstrdup(yyextra->litbuf.data);
+                                       return TOK_IDENT;
+                               }
+
+<xd>{xddouble} {
+                                       appendStringInfoChar(&yyextra->litbuf, '"');
+                               }
+
+<xd>{xdinside} {
+                                       appendBinaryStringInfo(&yyextra->litbuf, yytext, yyleng);
+                               }
+
+<xd><<EOF>>            {
+                                       BEGIN(INITIAL);
+                                       pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+                                                                "unterminated quoted identifier");
+                               }
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+/*
+ * Handler for errors while scanning or parsing advice.
+ *
+ * bison passes the error message to us via 'message', and the context is
+ * available via the 'yytext' macro. We assemble those values into a final
+ * error text and then arrange to pass it back to the caller of pgpa_yyparse()
+ * by storing it into *parse_error_msg_p.
+ */
+void
+pgpa_yyerror(List **result, char **parse_error_msg_p, yyscan_t yyscanner,
+                        const char *message)
+{
+       struct yyguts_t *yyg = (struct yyguts_t *) yyscanner;   /* needed for yytext
+                                                                                                                        * macro */
+
+
+       /* report only the first error in a parse operation */
+       if (*parse_error_msg_p)
+               return;
+
+       if (yytext[0])
+               *parse_error_msg_p = psprintf("%s at or near \"%s\"", message, yytext);
+       else
+               *parse_error_msg_p = psprintf("%s at end of input", message);
+}
+
+/*
+ * Initialize the advice scanner.
+ *
+ * This should be called before parsing begins.
+ */
+void
+pgpa_scanner_init(const char *str, yyscan_t *yyscannerp)
+{
+       yyscan_t        yyscanner;
+       pgpa_yy_extra_type      *yyext = palloc0_object(pgpa_yy_extra_type);
+
+       if (yylex_init(yyscannerp) != 0)
+               elog(ERROR, "yylex_init() failed: %m");
+
+       yyscanner = *yyscannerp;
+
+       initStringInfo(&yyext->litbuf);
+       pgpa_yyset_extra(yyext, yyscanner);
+
+       yy_scan_string(str, yyscanner);
+}
+
+
+/*
+ * Shut down the advice scanner.
+ *
+ * This should be called after parsing is complete.
+ */
+void
+pgpa_scanner_finish(yyscan_t yyscanner)
+{
+       yylex_destroy(yyscanner);
+}
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+yyalloc(yy_size_t size, yyscan_t yyscanner)
+{
+       return palloc(size);
+}
+
+void *
+yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner)
+{
+       if (ptr)
+               return repalloc(ptr, size);
+       else
+               return palloc(size);
+}
+
+void
+yyfree(void *ptr, yyscan_t yyscanner)
+{
+       if (ptr)
+               pfree(ptr);
+}
diff --git a/contrib/pg_plan_advice/pgpa_trove.c b/contrib/pg_plan_advice/pgpa_trove.c
new file mode 100644 (file)
index 0000000..a92121f
--- /dev/null
@@ -0,0 +1,490 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_trove.c
+ *       All of the advice given for a particular query, appropriately
+ *    organized for convenient access.
+ *
+ * This name comes from the English expression "trove of advice", which
+ * means a collection of wisdom. This slightly unusual term is chosen to
+ * avoid naming confusion; for example, "collection of advice" would
+ * invite confusion with pgpa_collector.c. Note that, while we don't know
+ * whether the provided advice is actually wise, it's not our job to
+ * question the user's choices.
+ *
+ * The goal of this module is to make it easy to locate the specific
+ * bits of advice that pertain to any given part of a query, or to
+ * determine that there are none.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_trove.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_trove.h"
+
+#include "common/hashfn_unstable.h"
+
+/*
+ * An advice trove is organized into a series of "slices", each of which
+ * contains information about one topic e.g. scan methods. Each slice consists
+ * of an array of trove entries plus a hash table that we can use to determine
+ * which ones are relevant to a particular part of the query.
+ */
+typedef struct pgpa_trove_slice
+{
+       unsigned        nallocated;
+       unsigned        nused;
+       pgpa_trove_entry *entries;
+       struct pgpa_trove_entry_hash *hash;
+} pgpa_trove_slice;
+
+/*
+ * Scan advice is stored into 'scan'; join advice is stored into 'join'; and
+ * advice that can apply to both cases is stored into 'rel'. This lets callers
+ * ask just for what's relevant. These slices correspond to the possible values
+ * of pgpa_trove_lookup_type.
+ */
+struct pgpa_trove
+{
+       pgpa_trove_slice join;
+       pgpa_trove_slice rel;
+       pgpa_trove_slice scan;
+};
+
+/*
+ * We're going to build a hash table to allow clients of this module to find
+ * relevant advice for a given part of the query quickly. However, we're going
+ * to use only three of the five key fields as hash keys. There are two reasons
+ * for this.
+ *
+ * First, it's allowable to set partition_schema to NULL to match a partition
+ * with the correct name in any schema.
+ *
+ * Second, we expect the "occurrence" and "partition_schema" portions of the
+ * relation identifiers to be mostly uninteresting. Most of the time, the
+ * occurrence field will be 1 and the partition_schema values will all be the
+ * same. Even when there is some variation, the absolute number of entries
+ * that have the same values for all three of these key fields should be
+ * quite small.
+ */
+typedef struct
+{
+       const char *alias_name;
+       const char *partition_name;
+       const char *plan_name;
+} pgpa_trove_entry_key;
+
+typedef struct
+{
+       pgpa_trove_entry_key key;
+       int                     status;
+       Bitmapset  *indexes;
+} pgpa_trove_entry_element;
+
+static uint32 pgpa_trove_entry_hash_key(pgpa_trove_entry_key key);
+
+static inline bool
+pgpa_trove_entry_compare_key(pgpa_trove_entry_key a, pgpa_trove_entry_key b)
+{
+       if (strcmp(a.alias_name, b.alias_name) != 0)
+               return false;
+
+       if (!strings_equal_or_both_null(a.partition_name, b.partition_name))
+               return false;
+
+       if (!strings_equal_or_both_null(a.plan_name, b.plan_name))
+               return false;
+
+       return true;
+}
+
+#define SH_PREFIX                      pgpa_trove_entry
+#define SH_ELEMENT_TYPE                pgpa_trove_entry_element
+#define SH_KEY_TYPE                    pgpa_trove_entry_key
+#define SH_KEY                         key
+#define SH_HASH_KEY(tb, key)   pgpa_trove_entry_hash_key(key)
+#define        SH_EQUAL(tb, a, b)      pgpa_trove_entry_compare_key(a, b)
+#define SH_SCOPE                       static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static void pgpa_init_trove_slice(pgpa_trove_slice *tslice);
+static void pgpa_trove_add_to_slice(pgpa_trove_slice *tslice,
+                                                                       pgpa_advice_tag_type tag,
+                                                                       pgpa_advice_target *target);
+static void pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash,
+                                                                  pgpa_advice_target *target,
+                                                                  int index);
+static Bitmapset *pgpa_trove_slice_lookup(pgpa_trove_slice *tslice,
+                                                                                 pgpa_identifier *rid);
+
+/*
+ * Build a trove of advice from a list of advice items.
+ *
+ * Caller can obtain a list of advice items to pass to this function by
+ * calling pgpa_parse().
+ */
+pgpa_trove *
+pgpa_build_trove(List *advice_items)
+{
+       pgpa_trove *trove = palloc_object(pgpa_trove);
+
+       pgpa_init_trove_slice(&trove->join);
+       pgpa_init_trove_slice(&trove->rel);
+       pgpa_init_trove_slice(&trove->scan);
+
+       foreach_ptr(pgpa_advice_item, item, advice_items)
+       {
+               switch (item->tag)
+               {
+                       case PGPA_TAG_JOIN_ORDER:
+                               {
+                                       pgpa_advice_target *target;
+
+                                       /*
+                                        * For most advice types, each element in the top-level
+                                        * list is a separate target, but it's most convenient to
+                                        * regard the entirety of a JOIN_ORDER specification as a
+                                        * single target. Since it wasn't represented that way
+                                        * during parsing, build a surrogate object now.
+                                        */
+                                       target = palloc0_object(pgpa_advice_target);
+                                       target->ttype = PGPA_TARGET_ORDERED_LIST;
+                                       target->children = item->targets;
+
+                                       pgpa_trove_add_to_slice(&trove->join,
+                                                                                       item->tag, target);
+                               }
+                               break;
+
+                       case PGPA_TAG_BITMAP_HEAP_SCAN:
+                       case PGPA_TAG_INDEX_ONLY_SCAN:
+                       case PGPA_TAG_INDEX_SCAN:
+                       case PGPA_TAG_SEQ_SCAN:
+                       case PGPA_TAG_TID_SCAN:
+
+                               /*
+                                * Scan advice.
+                                */
+                               foreach_ptr(pgpa_advice_target, target, item->targets)
+                               {
+                                       /*
+                                        * For now, all of our scan types target single relations,
+                                        * but in the future this might not be true, e.g. a custom
+                                        * scan could replace a join.
+                                        */
+                                       Assert(target->ttype == PGPA_TARGET_IDENTIFIER);
+                                       pgpa_trove_add_to_slice(&trove->scan,
+                                                                                       item->tag, target);
+                               }
+                               break;
+
+                       case PGPA_TAG_FOREIGN_JOIN:
+                       case PGPA_TAG_HASH_JOIN:
+                       case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+                       case PGPA_TAG_SEMIJOIN_UNIQUE:
+
+                               /*
+                                * Join strategy advice.
+                                */
+                               foreach_ptr(pgpa_advice_target, target, item->targets)
+                               {
+                                       pgpa_trove_add_to_slice(&trove->join,
+                                                                                       item->tag, target);
+                               }
+                               break;
+
+                       case PGPA_TAG_PARTITIONWISE:
+                       case PGPA_TAG_GATHER:
+                       case PGPA_TAG_GATHER_MERGE:
+                       case PGPA_TAG_NO_GATHER:
+
+                               /*
+                                * Advice about a RelOptInfo relevant to both scans and joins.
+                                */
+                               foreach_ptr(pgpa_advice_target, target, item->targets)
+                               {
+                                       pgpa_trove_add_to_slice(&trove->rel,
+                                                                                       item->tag, target);
+                               }
+                               break;
+               }
+       }
+
+       return trove;
+}
+
+/*
+ * Search a trove of advice for relevant entries.
+ *
+ * All parameters are input parameters except for *result, which is an output
+ * parameter used to return results to the caller.
+ */
+void
+pgpa_trove_lookup(pgpa_trove *trove, pgpa_trove_lookup_type type,
+                                 int nrids, pgpa_identifier *rids, pgpa_trove_result *result)
+{
+       pgpa_trove_slice *tslice;
+       Bitmapset  *indexes;
+
+       Assert(nrids > 0);
+
+       if (type == PGPA_TROVE_LOOKUP_SCAN)
+               tslice = &trove->scan;
+       else if (type == PGPA_TROVE_LOOKUP_JOIN)
+               tslice = &trove->join;
+       else
+               tslice = &trove->rel;
+
+       indexes = pgpa_trove_slice_lookup(tslice, &rids[0]);
+       for (int i = 1; i < nrids; ++i)
+       {
+               Bitmapset  *other_indexes;
+
+               /*
+                * If the caller is asking about two relations that aren't part of the
+                * same subquery, they've messed up.
+                */
+               Assert(strings_equal_or_both_null(rids[0].plan_name,
+                                                                                 rids[i].plan_name));
+
+               other_indexes = pgpa_trove_slice_lookup(tslice, &rids[i]);
+               indexes = bms_union(indexes, other_indexes);
+       }
+
+       result->entries = tslice->entries;
+       result->indexes = indexes;
+}
+
+/*
+ * Return all entries in a trove slice to the caller.
+ *
+ * The first two arguments are input arguments, and the remainder are output
+ * arguments.
+ */
+void
+pgpa_trove_lookup_all(pgpa_trove *trove, pgpa_trove_lookup_type type,
+                                         pgpa_trove_entry **entries, int *nentries)
+{
+       pgpa_trove_slice *tslice;
+
+       if (type == PGPA_TROVE_LOOKUP_SCAN)
+               tslice = &trove->scan;
+       else if (type == PGPA_TROVE_LOOKUP_JOIN)
+               tslice = &trove->join;
+       else
+               tslice = &trove->rel;
+
+       *entries = tslice->entries;
+       *nentries = tslice->nused;
+}
+
+/*
+ * Convert a trove entry to an item of plan advice that would produce it.
+ */
+char *
+pgpa_cstring_trove_entry(pgpa_trove_entry *entry)
+{
+       StringInfoData buf;
+
+       initStringInfo(&buf);
+       appendStringInfo(&buf, "%s", pgpa_cstring_advice_tag(entry->tag));
+
+       /* JOIN_ORDER tags are transformed by pgpa_build_trove; undo that here */
+       if (entry->tag != PGPA_TAG_JOIN_ORDER)
+               appendStringInfoChar(&buf, '(');
+       else
+               Assert(entry->target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+       pgpa_format_advice_target(&buf, entry->target);
+
+       if (entry->target->itarget != NULL)
+       {
+               appendStringInfoChar(&buf, ' ');
+               pgpa_format_index_target(&buf, entry->target->itarget);
+       }
+
+       if (entry->tag != PGPA_TAG_JOIN_ORDER)
+               appendStringInfoChar(&buf, ')');
+
+       return buf.data;
+}
+
+/*
+ * Set PGPA_TE_* flags on a set of trove entries.
+ */
+void
+pgpa_trove_set_flags(pgpa_trove_entry *entries, Bitmapset *indexes, int flags)
+{
+       int                     i = -1;
+
+       while ((i = bms_next_member(indexes, i)) >= 0)
+       {
+               pgpa_trove_entry *entry = &entries[i];
+
+               entry->flags |= flags;
+       }
+}
+
+/*
+ * Add a new advice target to an existing pgpa_trove_slice object.
+ */
+static void
+pgpa_trove_add_to_slice(pgpa_trove_slice *tslice,
+                                               pgpa_advice_tag_type tag,
+                                               pgpa_advice_target *target)
+{
+       pgpa_trove_entry *entry;
+
+       if (tslice->nused >= tslice->nallocated)
+       {
+               int                     new_allocated;
+
+               new_allocated = tslice->nallocated * 2;
+               tslice->entries = repalloc_array(tslice->entries, pgpa_trove_entry,
+                                                                                new_allocated);
+               tslice->nallocated = new_allocated;
+       }
+
+       entry = &tslice->entries[tslice->nused];
+       entry->tag = tag;
+       entry->target = target;
+       entry->flags = 0;
+
+       pgpa_trove_add_to_hash(tslice->hash, target, tslice->nused);
+
+       tslice->nused++;
+}
+
+/*
+ * Update the hash table for a newly-added advice target.
+ */
+static void
+pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash, pgpa_advice_target *target,
+                                          int index)
+{
+       pgpa_trove_entry_key key;
+       pgpa_trove_entry_element *element;
+       bool            found;
+
+       /* For non-identifiers, add entries for all descendents. */
+       if (target->ttype != PGPA_TARGET_IDENTIFIER)
+       {
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       pgpa_trove_add_to_hash(hash, child_target, index);
+               }
+               return;
+       }
+
+       /* Sanity checks. */
+       Assert(target->rid.occurrence > 0);
+       Assert(target->rid.alias_name != NULL);
+
+       /* Add an entry for this relation identifier. */
+       key.alias_name = target->rid.alias_name;
+       key.partition_name = target->rid.partrel;
+       key.plan_name = target->rid.plan_name;
+       element = pgpa_trove_entry_insert(hash, key, &found);
+       element->indexes = bms_add_member(element->indexes, index);
+}
+
+/*
+ * Create and initialize a new pgpa_trove_slice object.
+ */
+static void
+pgpa_init_trove_slice(pgpa_trove_slice *tslice)
+{
+       /*
+        * In an ideal world, we'll make tslice->nallocated big enough that the
+        * array and hash table will be large enough to contain the number of
+        * advice items in this trove slice, but a generous default value is not
+        * good for performance, because pgpa_init_trove_slice() has to zero an
+        * amount of memory proportional to tslice->nallocated. Hence, we keep the
+        * starting value quite small, on the theory that advice strings will
+        * often be relatively short.
+        */
+       tslice->nallocated = 16;
+       tslice->nused = 0;
+       tslice->entries = palloc_array(pgpa_trove_entry, tslice->nallocated);
+       tslice->hash = pgpa_trove_entry_create(CurrentMemoryContext,
+                                                                                  tslice->nallocated, NULL);
+}
+
+/*
+ * Fast hash function for a key consisting of alias_name, partition_name,
+ * and plan_name.
+ */
+static uint32
+pgpa_trove_entry_hash_key(pgpa_trove_entry_key key)
+{
+       fasthash_state hs;
+       int                     sp_len;
+
+       fasthash_init(&hs, 0);
+
+       /* alias_name may not be NULL */
+       sp_len = fasthash_accum_cstring(&hs, key.alias_name);
+
+       /* partition_name and plan_name, however, can be NULL */
+       if (key.partition_name != NULL)
+               sp_len += fasthash_accum_cstring(&hs, key.partition_name);
+       if (key.plan_name != NULL)
+               sp_len += fasthash_accum_cstring(&hs, key.plan_name);
+
+       /*
+        * hashfn_unstable.h recommends using string length as tweak. It's not
+        * clear to me what to do if there are multiple strings, so for now I'm
+        * just using the total of all of the lengths.
+        */
+       return fasthash_final32(&hs, sp_len);
+}
+
+/*
+ * Look for matching entries.
+ */
+static Bitmapset *
+pgpa_trove_slice_lookup(pgpa_trove_slice *tslice, pgpa_identifier *rid)
+{
+       pgpa_trove_entry_key key;
+       pgpa_trove_entry_element *element;
+       Bitmapset  *result = NULL;
+
+       Assert(rid->occurrence >= 1);
+
+       key.alias_name = rid->alias_name;
+       key.partition_name = rid->partrel;
+       key.plan_name = rid->plan_name;
+
+       element = pgpa_trove_entry_lookup(tslice->hash, key);
+
+       if (element != NULL)
+       {
+               int                     i = -1;
+
+               while ((i = bms_next_member(element->indexes, i)) >= 0)
+               {
+                       pgpa_trove_entry *entry = &tslice->entries[i];
+
+                       /*
+                        * We know that this target or one of its descendents matches the
+                        * identifier on the three key fields above, but we don't know
+                        * which descendent or whether the occurence and schema also
+                        * match.
+                        */
+                       if (pgpa_identifier_matches_target(rid, entry->target))
+                               result = bms_add_member(result, i);
+               }
+       }
+
+       return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_trove.h b/contrib/pg_plan_advice/pgpa_trove.h
new file mode 100644 (file)
index 0000000..479c3f7
--- /dev/null
@@ -0,0 +1,113 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_trove.h
+ *       All of the advice given for a particular query, appropriately
+ *    organized for convenient access.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_trove.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_TROVE_H
+#define PGPA_TROVE_H
+
+#include "pgpa_ast.h"
+
+#include "nodes/bitmapset.h"
+
+typedef struct pgpa_trove pgpa_trove;
+
+/*
+ * Flags that can be set on a pgpa_trove_entry to indicate what happened when
+ * trying to plan using advice.
+ *
+ * PGPA_TE_MATCH_PARTIAL means that we found some part of the query that at
+ * least partially matched the target; e.g. given JOIN_ORDER(a b), this would
+ * be set if we ever saw any joinrel including either "a" or "b".
+ *
+ * PGPA_TE_MATCH_FULL means that we found an exact match for the target; e.g.
+ * given JOIN_ORDER(a b), this would be set if we saw a joinrel containing
+ * exactly "a" and "b" and nothing else.
+ *
+ * PGPA_TE_INAPPLICABLE means that the advice doesn't properly apply to the
+ * target; e.g. INDEX_SCAN(foo bar_idx) would be so marked if bar_idx does not
+ * exist on foo. The fact that this bit has been set does not mean that the
+ * advice had no effect.
+ *
+ * PGPA_TE_CONFLICTING means that a conflict was detected between what this
+ * advice wants and what some other plan advice wants; e.g. JOIN_ORDER(a b)
+ * would conflict with HASH_JOIN(a), because the former requires "a" to be the
+ * outer table while the latter requires it to be the inner table.
+ *
+ * PGPA_TE_FAILED means that the resulting plan did not conform to the advice.
+ */
+#define PGPA_TE_MATCH_PARTIAL          0x0001
+#define PGPA_TE_MATCH_FULL                     0x0002
+#define PGPA_TE_INAPPLICABLE           0x0004
+#define PGPA_TE_CONFLICTING                    0x0008
+#define PGPA_TE_FAILED                         0x0010
+
+/*
+ * Each entry in a trove of advice represents the application of a tag to
+ * a single target.
+ */
+typedef struct pgpa_trove_entry
+{
+       pgpa_advice_tag_type tag;
+       pgpa_advice_target *target;
+       int                     flags;
+} pgpa_trove_entry;
+
+/*
+ * What kind of information does the caller want to find in a trove?
+ *
+ * PGPA_TROVE_LOOKUP_SCAN means we're looking for scan advice.
+ *
+ * PGPA_TROVE_LOOKUP_JOIN means we're looking for join-related advice.
+ * This includes join order advice, join method advice, and semijoin-uniqueness
+ * advice.
+ *
+ * PGPA_TROVE_LOOKUP_REL means we're looking for general advice about this
+ * a RelOptInfo that may correspond to either a scan or a join. This includes
+ * gather-related advice and partitionwise advice. Note that partitionwise
+ * advice might seem like join advice, but that's not a helpful way of viewing
+ * the matter because (1) partitionwise advice is also relevant at the scan
+ * level and (2) other types of join advice affect only what to do from
+ * join_path_setup_hook, but partitionwise advice affects what to do in
+ * joinrel_setup_hook.
+ */
+typedef enum pgpa_trove_lookup_type
+{
+       PGPA_TROVE_LOOKUP_JOIN,
+       PGPA_TROVE_LOOKUP_REL,
+       PGPA_TROVE_LOOKUP_SCAN
+} pgpa_trove_lookup_type;
+
+/*
+ * This struct is used to store the result of a trove lookup. For each member
+ * of "indexes", the entry at the corresponding offset within "entries" is one
+ * of the results.
+ */
+typedef struct pgpa_trove_result
+{
+       pgpa_trove_entry *entries;
+       Bitmapset  *indexes;
+} pgpa_trove_result;
+
+extern pgpa_trove *pgpa_build_trove(List *advice_items);
+extern void pgpa_trove_lookup(pgpa_trove *trove,
+                                                         pgpa_trove_lookup_type type,
+                                                         int nrids,
+                                                         pgpa_identifier *rids,
+                                                         pgpa_trove_result *result);
+extern void pgpa_trove_lookup_all(pgpa_trove *trove,
+                                                                 pgpa_trove_lookup_type type,
+                                                                 pgpa_trove_entry **entries,
+                                                                 int *nentries);
+extern char *pgpa_cstring_trove_entry(pgpa_trove_entry *entry);
+extern void pgpa_trove_set_flags(pgpa_trove_entry *entries,
+                                                                Bitmapset *indexes, int flags);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c
new file mode 100644 (file)
index 0000000..7e4e388
--- /dev/null
@@ -0,0 +1,862 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_walker.c
+ *       Plan tree iteration
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_walker.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/plannodes.h"
+
+static void pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan,
+                                                                 bool within_join_problem,
+                                                                 pgpa_join_unroller *join_unroller,
+                                                                 List *active_query_features,
+                                                                 bool beneath_any_gather);
+static Bitmapset *pgpa_process_unrolled_join(pgpa_plan_walker_context *walker,
+                                                                                        pgpa_unrolled_join *ujoin);
+
+static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *walker,
+                                                                                       pgpa_qf_type type,
+                                                                                       Plan *plan);
+
+static void pgpa_qf_add_rti(List *active_query_features, Index rti);
+static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids);
+static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan);
+
+static bool pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin,
+                                                                                  Index rtable_length,
+                                                                                  pgpa_identifier *rt_identifiers,
+                                                                                  pgpa_advice_target *target,
+                                                                                  bool toplevel);
+static bool pgpa_walker_join_order_matches_member(pgpa_join_member *member,
+                                                                                                 Index rtable_length,
+                                                                                                 pgpa_identifier *rt_identifiers,
+                                                                                                 pgpa_advice_target *target);
+static bool pgpa_walker_contains_scan(pgpa_plan_walker_context *walker,
+                                                                         pgpa_scan_strategy strategy,
+                                                                         Bitmapset *relids);
+static bool pgpa_walker_contains_feature(pgpa_plan_walker_context *walker,
+                                                                                pgpa_qf_type type,
+                                                                                Bitmapset *relids);
+static bool pgpa_walker_contains_join(pgpa_plan_walker_context *walker,
+                                                                         pgpa_join_strategy strategy,
+                                                                         Bitmapset *relids);
+static bool pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker,
+                                                                                  Bitmapset *relids);
+static Index pgpa_walker_get_rti(Index rtable_length,
+                                                                pgpa_identifier *rt_identifiers,
+                                                                pgpa_identifier *rid);
+
+/*
+ * Top-level entrypoint for the plan tree walk.
+ *
+ * Populates walker based on a traversal of the Plan trees in pstmt.
+ */
+void
+pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt)
+{
+       ListCell   *lc;
+
+       /* Initialization. */
+       memset(walker, 0, sizeof(pgpa_plan_walker_context));
+       walker->pstmt = pstmt;
+
+       /* Walk the main plan tree. */
+       pgpa_walk_recursively(walker, pstmt->planTree, 0, NULL, NIL, false);
+
+       /* Main plan tree walk won't reach subplans, so walk those. */
+       foreach(lc, pstmt->subplans)
+       {
+               Plan       *plan = lfirst(lc);
+
+               if (plan != NULL)
+                       pgpa_walk_recursively(walker, plan, 0, NULL, NIL, false);
+       }
+}
+
+/*
+ * Main workhorse for the plan tree walk.
+ *
+ * If within_join_problem is true, we encountered a join at some higher level
+ * of the tree walk and haven't yet descended out of the portion of the plan
+ * tree that is part of that same join problem. We're no longer in the same
+ * join problem if (1) we cross into a different subquery or (2) we descend
+ * through an Append or MergeAppend node, below which any further joins would
+ * be partitionwise joins planned separately from the outer join problem.
+ *
+ * If join_unroller != NULL, the join unroller code expects us to find a join
+ * that should be unrolled into that object. This implies that we're within a
+ * join problem, but the reverse is not true: when we've traversed all the
+ * joins but are still looking for the scan that is the leaf of the join tree,
+ * join_unroller will be NULL but within_join_problem will be true.
+ *
+ * Each element of active_query_features corresponds to some item of advice
+ * that needs to enumerate all the relations it affects. We add RTIs we find
+ * during tree traversal to each of these query features.
+ *
+ * If beneath_any_gather == true, some higher level of the tree traversal found
+ * a Gather or Gather Merge node.
+ */
+static void
+pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan,
+                                         bool within_join_problem,
+                                         pgpa_join_unroller *join_unroller,
+                                         List *active_query_features,
+                                         bool beneath_any_gather)
+{
+       pgpa_join_unroller *outer_join_unroller = NULL;
+       pgpa_join_unroller *inner_join_unroller = NULL;
+       bool            join_unroller_toplevel = false;
+       List       *pushdown_query_features = NIL;
+       ListCell   *lc;
+       List       *extraplans = NIL;
+       List       *elided_nodes = NIL;
+
+       Assert(within_join_problem || join_unroller == NULL);
+
+       /*
+        * If this is a Gather or Gather Merge node, directly add it to the list
+        * of currently-active query features.
+        *
+        * Otherwise, check the future_query_features list to see whether this was
+        * previously identified as a plan node that needs to be treated as a
+        * query feature.
+        *
+        * Note that the caller also has a copy to active_query_features, so we
+        * can't destructively modify it without making a copy.
+        */
+       if (IsA(plan, Gather))
+       {
+               active_query_features =
+                       lappend(list_copy(active_query_features),
+                                       pgpa_add_feature(walker, PGPAQF_GATHER, plan));
+               beneath_any_gather = true;
+       }
+       else if (IsA(plan, GatherMerge))
+       {
+               active_query_features =
+                       lappend(list_copy(active_query_features),
+                                       pgpa_add_feature(walker, PGPAQF_GATHER_MERGE, plan));
+               beneath_any_gather = true;
+       }
+       else
+       {
+               foreach_ptr(pgpa_query_feature, qf, walker->future_query_features)
+               {
+                       if (qf->plan == plan)
+                       {
+                               active_query_features = list_copy(active_query_features);
+                               active_query_features = lappend(active_query_features, qf);
+                               walker->future_query_features =
+                                       list_delete_ptr(walker->future_query_features, plan);
+                               break;
+                       }
+               }
+       }
+
+       /*
+        * Find all elided nodes for this Plan node.
+        */
+       foreach_node(ElidedNode, n, walker->pstmt->elidedNodes)
+       {
+               if (n->plan_node_id == plan->plan_node_id)
+                       elided_nodes = lappend(elided_nodes, n);
+       }
+
+       /* If we found any elided_nodes, handle them. */
+       if (elided_nodes != NIL)
+       {
+               int                     num_elided_nodes = list_length(elided_nodes);
+               ElidedNode *last_elided_node;
+
+               /*
+                * RTIs for the final -- and thus logically uppermost -- elided node
+                * should be collected for query features passed down by the caller.
+                * However, elided nodes act as barriers to query features, which
+                * means that (1) the remaining elided nodes, if any, should be
+                * ignored for purposes of query features and (2) the list of active
+                * query features should be reset to empty so that we do not add RTIs
+                * from the plan node that is logically beneath the elided node to the
+                * query features passed down from the caller.
+                */
+               last_elided_node = list_nth(elided_nodes, num_elided_nodes - 1);
+               pgpa_qf_add_rtis(active_query_features, last_elided_node->relids);
+               active_query_features = NIL;
+
+               /*
+                * If we're within a join problem, the join_unroller is responsible
+                * for building the scan for the final elided node, so throw it out.
+                */
+               if (within_join_problem)
+                       elided_nodes = list_truncate(elided_nodes, num_elided_nodes - 1);
+
+               /* Build scans for all (or the remaining) elided nodes. */
+               foreach_node(ElidedNode, elided_node, elided_nodes)
+               {
+                       (void) pgpa_build_scan(walker, plan, elided_node,
+                                                                  beneath_any_gather, within_join_problem);
+               }
+
+               /*
+                * If there were any elided nodes, then everything beneath those nodes
+                * is not part of the same join problem.
+                *
+                * In more detail, if an Append or MergeAppend was elided, then a
+                * partitionwise join was chosen and only a single child survived; if
+                * a SubqueryScan was elided, the subquery was planned without
+                * flattening it into the parent.
+                */
+               within_join_problem = false;
+               join_unroller = NULL;
+       }
+
+       /*
+        * If we're within a join problem, the join unroller is responsible for
+        * building any required scan for this node. If not, we do it here.
+        */
+       if (!within_join_problem)
+               (void) pgpa_build_scan(walker, plan, NULL, beneath_any_gather, false);
+
+       /*
+        * If this join needs to unrolled but there's no join unroller already
+        * available, create one.
+        */
+       if (join_unroller == NULL && pgpa_is_join(plan))
+       {
+               join_unroller = pgpa_create_join_unroller();
+               join_unroller_toplevel = true;
+               within_join_problem = true;
+       }
+
+       /*
+        * If this join is to be unrolled, pgpa_unroll_join() will return the join
+        * unroller object that should be passed down when we recurse into the
+        * outer and inner sides of the plan.
+        */
+       if (join_unroller != NULL)
+               pgpa_unroll_join(walker, plan, beneath_any_gather, join_unroller,
+                                                &outer_join_unroller, &inner_join_unroller);
+
+       /* Add RTIs from the plan node to all active query features. */
+       pgpa_qf_add_plan_rtis(active_query_features, plan);
+
+       /*
+        * Recurse into the outer and inner subtrees.
+        *
+        * As an exception, if this is a ForeignScan, don't recurse. postgres_fdw
+        * sometimes stores an EPQ recheck plan in plan->leftree, but that's going
+        * to mention the same set of relations as the ForeignScan itself, and we
+        * have no way to emit advice targeting the EPQ case vs. the non-EPQ case.
+        * Moreover, it's not entirely clear what other FDWs might do with the
+        * left and right subtrees. Maybe some better handling is needed here, but
+        * for now, we just punt.
+        */
+       if (!IsA(plan, ForeignScan))
+       {
+               if (plan->lefttree != NULL)
+                       pgpa_walk_recursively(walker, plan->lefttree, within_join_problem,
+                                                                 outer_join_unroller, active_query_features,
+                                                                 beneath_any_gather);
+               if (plan->righttree != NULL)
+                       pgpa_walk_recursively(walker, plan->righttree, within_join_problem,
+                                                                 inner_join_unroller, active_query_features,
+                                                                 beneath_any_gather);
+       }
+
+       /*
+        * If we created a join unroller up above, then it's also our join to use
+        * it to build the final pgpa_unrolled_join, and to destroy the object.
+        */
+       if (join_unroller_toplevel)
+       {
+               pgpa_unrolled_join *ujoin;
+
+               ujoin = pgpa_build_unrolled_join(walker, join_unroller);
+               walker->toplevel_unrolled_joins =
+                       lappend(walker->toplevel_unrolled_joins, ujoin);
+               pgpa_destroy_join_unroller(join_unroller);
+               (void) pgpa_process_unrolled_join(walker, ujoin);
+       }
+
+       /*
+        * Some plan types can have additional children. Nodes like Append that
+        * can have any number of children store them in a List; a SubqueryScan
+        * just has a field for a single additional Plan.
+        */
+       switch (nodeTag(plan))
+       {
+               case T_Append:
+                       {
+                               Append     *aplan = (Append *) plan;
+
+                               extraplans = aplan->appendplans;
+                               if (bms_is_empty(aplan->apprelids))
+                                       pushdown_query_features = active_query_features;
+                       }
+                       break;
+               case T_MergeAppend:
+                       {
+                               MergeAppend *maplan = (MergeAppend *) plan;
+
+                               extraplans = maplan->mergeplans;
+                               if (bms_is_empty(maplan->apprelids))
+                                       pushdown_query_features = active_query_features;
+                       }
+                       break;
+               case T_BitmapAnd:
+                       extraplans = ((BitmapAnd *) plan)->bitmapplans;
+                       break;
+               case T_BitmapOr:
+                       extraplans = ((BitmapOr *) plan)->bitmapplans;
+                       break;
+               case T_SubqueryScan:
+
+                       /*
+                        * We don't pass down active_query_features across here, because
+                        * those are specific to a subquery level.
+                        */
+                       pgpa_walk_recursively(walker, ((SubqueryScan *) plan)->subplan,
+                                                                 0, NULL, NIL, beneath_any_gather);
+                       break;
+               case T_CustomScan:
+                       extraplans = ((CustomScan *) plan)->custom_plans;
+                       break;
+               default:
+                       break;
+       }
+
+       /* If we found a list of extra children, iterate over it. */
+       foreach(lc, extraplans)
+       {
+               Plan       *subplan = lfirst(lc);
+
+               pgpa_walk_recursively(walker, subplan, 0, NULL, pushdown_query_features,
+                                                         beneath_any_gather);
+       }
+}
+
+/*
+ * Perform final processing of a newly-constructed pgpa_unrolled_join. This
+ * only needs to be called for toplevel pgpa_unrolled_join objects, since it
+ * recurses to sub-joins as needed.
+ *
+ * Our goal is to add the set of inner relids to the relevant join_strategies
+ * list, and to do the same for any sub-joins. To that end, the return value
+ * is the set of relids found beneath the inner side of the join, but it is
+ * expected that the toplevel caller will ignore this.
+ */
+static Bitmapset *
+pgpa_process_unrolled_join(pgpa_plan_walker_context *walker,
+                                                  pgpa_unrolled_join *ujoin)
+{
+       Bitmapset  *all_relids = NULL;
+
+       for (int k = 0; k < ujoin->ninner; ++k)
+       {
+               pgpa_join_member *member = &ujoin->inner[k];
+               Bitmapset  *relids;
+
+               if (member->unrolled_join != NULL)
+                       relids = pgpa_process_unrolled_join(walker,
+                                                                                               member->unrolled_join);
+               else
+               {
+                       Assert(member->scan != NULL);
+                       relids = member->scan->relids;
+               }
+               walker->join_strategies[ujoin->strategy[k]] =
+                       lappend(walker->join_strategies[ujoin->strategy[k]], relids);
+               all_relids = bms_add_members(all_relids, relids);
+       }
+
+       return all_relids;
+}
+
+/*
+ * Arrange for the given plan node to be treated as a query feature when the
+ * tree walk reaches it.
+ *
+ * Make sure to only use this for nodes that the tree walk can't have reached
+ * yet!
+ */
+void
+pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+                                               pgpa_qf_type type, Plan *plan)
+{
+       pgpa_query_feature *qf = pgpa_add_feature(walker, type, plan);
+
+       walker->future_query_features =
+               lappend(walker->future_query_features, qf);
+}
+
+/*
+ * Return the last of any elided nodes associated with this plan node ID.
+ *
+ * The last elided node is the one that would have been uppermost in the plan
+ * tree had it not been removed during setrefs processig.
+ */
+ElidedNode *
+pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan)
+{
+       ElidedNode *elided_node = NULL;
+
+       foreach_node(ElidedNode, n, pstmt->elidedNodes)
+       {
+               if (n->plan_node_id == plan->plan_node_id)
+                       elided_node = n;
+       }
+
+       return elided_node;
+}
+
+/*
+ * Certain plan nodes can refer to a set of RTIs. Extract and return the set.
+ */
+Bitmapset *
+pgpa_relids(Plan *plan)
+{
+       if (IsA(plan, Result))
+               return ((Result *) plan)->relids;
+       else if (IsA(plan, ForeignScan))
+               return ((ForeignScan *) plan)->fs_relids;
+       else if (IsA(plan, Append))
+               return ((Append *) plan)->apprelids;
+       else if (IsA(plan, MergeAppend))
+               return ((MergeAppend *) plan)->apprelids;
+
+       return NULL;
+}
+
+/*
+ * Extract the scanned RTI from a plan node.
+ *
+ * Returns 0 if there isn't one.
+ */
+Index
+pgpa_scanrelid(Plan *plan)
+{
+       switch (nodeTag(plan))
+       {
+               case T_SeqScan:
+               case T_SampleScan:
+               case T_BitmapHeapScan:
+               case T_TidScan:
+               case T_TidRangeScan:
+               case T_SubqueryScan:
+               case T_FunctionScan:
+               case T_TableFuncScan:
+               case T_ValuesScan:
+               case T_CteScan:
+               case T_NamedTuplestoreScan:
+               case T_WorkTableScan:
+               case T_ForeignScan:
+               case T_CustomScan:
+               case T_IndexScan:
+               case T_IndexOnlyScan:
+                       return ((Scan *) plan)->scanrelid;
+               default:
+                       return 0;
+       }
+}
+
+/*
+ * Create a pgpa_query_feature and add it to the list of all query features
+ * for this plan.
+ */
+static pgpa_query_feature *
+pgpa_add_feature(pgpa_plan_walker_context *walker,
+                                pgpa_qf_type type, Plan *plan)
+{
+       pgpa_query_feature *qf = palloc0_object(pgpa_query_feature);
+
+       qf->type = type;
+       qf->plan = plan;
+
+       walker->query_features[qf->type] =
+               lappend(walker->query_features[qf->type], qf);
+
+       return qf;
+}
+
+/*
+ * Add a single RTI to each active query feature.
+ */
+static void
+pgpa_qf_add_rti(List *active_query_features, Index rti)
+{
+       foreach_ptr(pgpa_query_feature, qf, active_query_features)
+       {
+               qf->relids = bms_add_member(qf->relids, rti);
+       }
+}
+
+/*
+ * Add a set of RTIs to each active query feature.
+ */
+static void
+pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids)
+{
+       foreach_ptr(pgpa_query_feature, qf, active_query_features)
+       {
+               qf->relids = bms_add_members(qf->relids, relids);
+       }
+}
+
+/*
+ * Add RTIs directly contained in a plan node to each active query feature.
+ */
+static void
+pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan)
+{
+       Bitmapset  *relids;
+       Index           rti;
+
+       if ((relids = pgpa_relids(plan)) != NULL)
+               pgpa_qf_add_rtis(active_query_features, relids);
+       else if ((rti = pgpa_scanrelid(plan)) != 0)
+               pgpa_qf_add_rti(active_query_features, rti);
+}
+
+/*
+ * If we generated plan advice using the provided walker object and array
+ * of identifiers, would we generate the specified tag/target combination?
+ *
+ * If yes, the plan conforms to the advice; if no, it does not. Note that
+ * we have know way of knowing whether the planner was forced to emit a plan
+ * that conformed to the advice or just happened to do so.
+ */
+bool
+pgpa_walker_would_advise(pgpa_plan_walker_context *walker,
+                                                pgpa_identifier *rt_identifiers,
+                                                pgpa_advice_tag_type tag,
+                                                pgpa_advice_target *target)
+{
+       Index           rtable_length = list_length(walker->pstmt->rtable);
+       Bitmapset  *relids = NULL;
+
+       if (tag == PGPA_TAG_JOIN_ORDER)
+       {
+               foreach_ptr(pgpa_unrolled_join, ujoin, walker->toplevel_unrolled_joins)
+               {
+                       if (pgpa_walker_join_order_matches(ujoin, rtable_length,
+                                                                                          rt_identifiers, target, true))
+                               return true;
+               }
+
+               return false;
+       }
+
+       if (target->ttype == PGPA_TARGET_IDENTIFIER)
+       {
+               Index           rti;
+
+               rti = pgpa_walker_get_rti(rtable_length, rt_identifiers, &target->rid);
+               relids = bms_make_singleton(rti);
+       }
+       else
+       {
+               Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+               foreach_ptr(pgpa_advice_target, child_target, target->children)
+               {
+                       Index           rti;
+
+                       Assert(child_target->ttype == PGPA_TARGET_IDENTIFIER);
+                       rti = pgpa_compute_rti_from_identifier(rtable_length,
+                                                                                                  rt_identifiers,
+                                                                                                  &child_target->rid);
+                       if (rti == 0)
+                               elog(ERROR, "cannot determine RTI for advice target");
+                       relids = bms_add_member(relids, rti);
+               }
+       }
+
+       switch (tag)
+       {
+               case PGPA_TAG_JOIN_ORDER:
+                       /* should have been handled above */
+                       pg_unreachable();
+                       break;
+               case PGPA_TAG_BITMAP_HEAP_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_BITMAP_HEAP,
+                                                                                        relids);
+               case PGPA_TAG_FOREIGN_JOIN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_FOREIGN,
+                                                                                        relids);
+               case PGPA_TAG_INDEX_ONLY_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_INDEX_ONLY,
+                                                                                        relids);
+               case PGPA_TAG_INDEX_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_INDEX,
+                                                                                        relids);
+               case PGPA_TAG_PARTITIONWISE:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_PARTITIONWISE,
+                                                                                        relids);
+               case PGPA_TAG_SEQ_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_SEQ,
+                                                                                        relids);
+               case PGPA_TAG_TID_SCAN:
+                       return pgpa_walker_contains_scan(walker,
+                                                                                        PGPA_SCAN_TID,
+                                                                                        relids);
+               case PGPA_TAG_GATHER:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_GATHER,
+                                                                                               relids);
+               case PGPA_TAG_GATHER_MERGE:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_GATHER_MERGE,
+                                                                                               relids);
+               case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_SEMIJOIN_NON_UNIQUE,
+                                                                                               relids);
+               case PGPA_TAG_SEMIJOIN_UNIQUE:
+                       return pgpa_walker_contains_feature(walker,
+                                                                                               PGPAQF_SEMIJOIN_UNIQUE,
+                                                                                               relids);
+               case PGPA_TAG_HASH_JOIN:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_HASH_JOIN,
+                                                                                        relids);
+               case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_MERGE_JOIN_MATERIALIZE,
+                                                                                        relids);
+               case PGPA_TAG_MERGE_JOIN_PLAIN:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_MERGE_JOIN_PLAIN,
+                                                                                        relids);
+               case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_NESTED_LOOP_MATERIALIZE,
+                                                                                        relids);
+               case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_NESTED_LOOP_MEMOIZE,
+                                                                                        relids);
+               case PGPA_TAG_NESTED_LOOP_PLAIN:
+                       return pgpa_walker_contains_join(walker,
+                                                                                        JSTRAT_NESTED_LOOP_PLAIN,
+                                                                                        relids);
+               case PGPA_TAG_NO_GATHER:
+                       return pgpa_walker_contains_no_gather(walker, relids);
+       }
+
+       /* should not get here */
+       return false;
+}
+
+/*
+ * Does an unrolled join match the join order specified by an advice target?
+ */
+static bool
+pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin,
+                                                          Index rtable_length,
+                                                          pgpa_identifier *rt_identifiers,
+                                                          pgpa_advice_target *target,
+                                                          bool toplevel)
+{
+       int             nchildren = list_length(target->children);
+
+       Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+       /* At toplevel, we allow a prefix match. */
+       if (toplevel)
+       {
+               if (nchildren > ujoin->ninner + 1)
+                       return false;
+       }
+       else
+       {
+               if (nchildren != ujoin->ninner + 1)
+                       return false;
+       }
+
+       /* Outermost rel must match. */
+       if (!pgpa_walker_join_order_matches_member(&ujoin->outer,
+                                                                                          rtable_length,
+                                                                                          rt_identifiers,
+                                                                                          linitial(target->children)))
+               return false;
+
+       /* Each inner rel must match. */
+       for (int n = 0; n < nchildren - 1; ++n)
+       {
+               pgpa_advice_target *child_target = list_nth(target->children, n + 1);
+
+               if (!pgpa_walker_join_order_matches_member(&ujoin->inner[n],
+                                                                                                  rtable_length,
+                                                                                                  rt_identifiers,
+                                                                                                  child_target))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Does one member of an unrolled join match an advice target?
+ */
+static bool
+pgpa_walker_join_order_matches_member(pgpa_join_member *member,
+                                                                         Index rtable_length,
+                                                                         pgpa_identifier *rt_identifiers,
+                                                                         pgpa_advice_target *target)
+{
+       Bitmapset  *relids = NULL;
+
+       if (member->unrolled_join != NULL)
+       {
+               if (target->ttype != PGPA_TARGET_ORDERED_LIST)
+                       return false;
+               return pgpa_walker_join_order_matches(member->unrolled_join,
+                                                                                         rtable_length,
+                                                                                         rt_identifiers,
+                                                                                         target,
+                                                                                         false);
+       }
+
+       Assert(member->scan != NULL);
+       switch (target->ttype)
+       {
+               case PGPA_TARGET_ORDERED_LIST:
+                       /* Could only match an unrolled join */
+                       return false;
+
+               case PGPA_TARGET_UNORDERED_LIST:
+                       {
+                               foreach_ptr(pgpa_advice_target, child_target, target->children)
+                               {
+                                       Index           rti;
+
+                                       rti = pgpa_walker_get_rti(rtable_length, rt_identifiers,
+                                                                                         &child_target->rid);
+                                       relids = bms_add_member(relids, rti);
+                               }
+                               break;
+                       }
+
+               case PGPA_TARGET_IDENTIFIER:
+                       {
+                               Index           rti;
+
+                               rti = pgpa_walker_get_rti(rtable_length, rt_identifiers,
+                                                                                 &target->rid);
+                               relids = bms_make_singleton(rti);
+                               break;
+                       }
+       }
+
+       return bms_equal(member->scan->relids, relids);
+}
+
+/*
+ * Does this walker say that the given scan strategy should be used for the
+ * given relid set?
+ */
+static bool
+pgpa_walker_contains_scan(pgpa_plan_walker_context *walker,
+                                                 pgpa_scan_strategy strategy,
+                                                 Bitmapset *relids)
+{
+       List       *scans = walker->scans[strategy];
+
+       foreach_ptr(pgpa_scan, scan, scans)
+       {
+               /*
+                * XXX. If this is index-related advice, we should also validate that
+                * the advice target's index target matches the Plan tree.
+                */
+               if (bms_equal(scan->relids, relids))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Does this walker say that the given query feature applies to the given
+ * relid set?
+ */
+static bool
+pgpa_walker_contains_feature(pgpa_plan_walker_context *walker,
+                                                        pgpa_qf_type type,
+                                                        Bitmapset *relids)
+{
+       List       *query_features = walker->query_features[type];
+
+       foreach_ptr(pgpa_query_feature, qf, query_features)
+       {
+               if (bms_equal(qf->relids, relids))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Does the walker say that the given join strategy should be used for the
+ * given relid set?
+ */
+static bool
+pgpa_walker_contains_join(pgpa_plan_walker_context *walker,
+                                                 pgpa_join_strategy strategy,
+                                                 Bitmapset *relids)
+{
+       List       *join_strategies = walker->join_strategies[strategy];
+
+       foreach_ptr(Bitmapset, jsrelids, join_strategies)
+       {
+               if (bms_equal(jsrelids, relids))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Does the walker say that the given relids should be marked as NO_GATHER?
+ */
+static bool
+pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker,
+                                                          Bitmapset *relids)
+{
+       return bms_is_subset(relids, walker->no_gather_scans);
+}
+
+/*
+ * Convenience function to convert a relation identifier to an RTI.
+ *
+ * We throw an error here because we expect this to be used on system-generated
+ * advice. Hence, failure here indicates an advice generation bug.
+ */
+static Index
+pgpa_walker_get_rti(Index rtable_length,
+                                       pgpa_identifier *rt_identifiers,
+                                       pgpa_identifier *rid)
+{
+       Index           rti;
+
+       rti = pgpa_compute_rti_from_identifier(rtable_length,
+                                                                                  rt_identifiers,
+                                                                                  rid);
+       if (rti == 0)
+               elog(ERROR, "cannot determine RTI for advice target");
+       return rti;
+}
diff --git a/contrib/pg_plan_advice/pgpa_walker.h b/contrib/pg_plan_advice/pgpa_walker.h
new file mode 100644 (file)
index 0000000..d6584c0
--- /dev/null
@@ -0,0 +1,121 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_walker.h
+ *       Plan tree iteration
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ *       contrib/pg_plan_advice/pgpa_walker.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_WALKER_H
+#define PGPA_WALKER_H
+
+#include "pgpa_ast.h"
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+
+/*
+ * We use the term "query feature" to refer to plan nodes that are interesting
+ * in the following way: to generate advice, we'll need to know the set of
+ * same-subquery, non-join RTIs occuring at or below that plan node, without
+ * admixture of parent and child RTIs.
+ *
+ * For example, Gather nodes, desiginated by PGPAQF_GATHER, and Gather Merge
+ * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll
+ * want to admit some kind of advice that describes the portion of the plan
+ * tree that appears beneath those nodes.
+ *
+ * Each semijoin can be implemented either by directly performing a semijoin,
+ * or by making one side unique and then performing a normal join. Either way,
+ * we use a query feature to notice what decision was made, so that we can
+ * describe it by enumerating the RTIs on that side of the join.
+ *
+ * To elaborate on the "no admixture of parent and child RTIs" rule, in all of
+ * these cases, if the entirety of an inheritance hierarchy appears beneath
+ * the query feature, we only want to name the parent table. But it's also
+ * possible to have cases where we must name child tables. This is particularly
+ * likely to happen when partitionwise join is in use, but could happen for
+ * Gather or Gather Merge even without that, if one of those appears below
+ * an Append or MergeAppend node for a single table.
+ */
+typedef enum pgpa_qf_type
+{
+       PGPAQF_GATHER,
+       PGPAQF_GATHER_MERGE,
+       PGPAQF_SEMIJOIN_NON_UNIQUE,
+       PGPAQF_SEMIJOIN_UNIQUE
+       /* update NUM_PGPA_QF_TYPES if you add anything here */
+} pgpa_qf_type;
+
+#define NUM_PGPA_QF_TYPES ((int) PGPAQF_SEMIJOIN_UNIQUE + 1)
+
+/*
+ * For each query feature, we keep track of the feature type and the set of
+ * relids that we found underneath the relevant plan node. See the comments
+ * on pgpa_qf_type, above, for additional details.
+ */
+typedef struct pgpa_query_feature
+{
+       pgpa_qf_type type;
+       Plan       *plan;
+       Bitmapset  *relids;
+} pgpa_query_feature;
+
+/*
+ * Context object for plan tree walk.
+ *
+ * pstmt is the PlannedStmt we're studying.
+ *
+ * scans is an array of lists of pgpa_scan objects. The array is indexed by
+ * the scan's pgpa_scan_strategy.
+ *
+ * no_gather_scans is the set of scan RTIs that do not appear beneath any
+ * Gather or Gather Merge node.
+ *
+ * toplevel_unrolled_joins is a list of all pgpa_unrolled_join objects that
+ * are not a child of some other pgpa_unrolled_join.
+ *
+ * join_strategy is an array of lists of Bitmapset objects. Each Bitmapset
+ * is the set of relids that appears on the inner side of some join (excluding
+ * RTIs from partition children and subqueries). The array is indexed by
+ * pgpa_join_strategy.
+ *
+ * query_features is an array lists of pgpa_query_feature objects, indexed
+ * by pgpa_qf_type.
+ *
+ * future_query_features is only used during the plan tree walk and should
+ * be empty when the tree walk concludes. It is a list of pgpa_query_feature
+ * objects for Plan nodes that the plan tree walk has not yet encountered;
+ * when encountered, they will be moved to the list of active query features
+ * that is propagated via the call stack.
+ */
+typedef struct pgpa_plan_walker_context
+{
+       PlannedStmt *pstmt;
+       List       *scans[NUM_PGPA_SCAN_STRATEGY];
+       Bitmapset  *no_gather_scans;
+       List       *toplevel_unrolled_joins;
+       List       *join_strategies[NUM_PGPA_JOIN_STRATEGY];
+       List       *query_features[NUM_PGPA_QF_TYPES];
+       List       *future_query_features;
+} pgpa_plan_walker_context;
+
+extern void pgpa_plan_walker(pgpa_plan_walker_context *walker,
+                                                        PlannedStmt *pstmt);
+
+extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+                                                                       pgpa_qf_type type,
+                                                                       Plan *plan);
+
+extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan);
+extern Bitmapset *pgpa_relids(Plan *plan);
+extern Index pgpa_scanrelid(Plan *plan);
+
+extern bool pgpa_walker_would_advise(pgpa_plan_walker_context *walker,
+                                                                        pgpa_identifier *rt_identifiers,
+                                                                        pgpa_advice_tag_type tag,
+                                                                        pgpa_advice_target *target);
+
+#endif
diff --git a/contrib/pg_plan_advice/sql/gather.sql b/contrib/pg_plan_advice/sql/gather.sql
new file mode 100644 (file)
index 0000000..5828004
--- /dev/null
@@ -0,0 +1,76 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 1;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+SET min_parallel_table_scan_size = 0;
+SET debug_parallel_query = off;
+
+CREATE TABLE gt_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE gt_dim;
+
+CREATE TABLE gt_fact (
+       id int not null,
+       dim_id integer not null references gt_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO gt_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE gt_fact;
+
+-- By default, we expect Gather Merge with a parallel hash join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+
+-- Force Gather or Gather Merge of both relations together.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force a separate Gather or Gather Merge operation for each relation.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force a Gather or Gather Merge on one relation but no parallelism on other.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force no Gather or Gather Merge use at all.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'no_gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/join_order.sql b/contrib/pg_plan_advice/sql/join_order.sql
new file mode 100644 (file)
index 0000000..5aa2fc6
--- /dev/null
@@ -0,0 +1,96 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,100) g;
+VACUUM ANALYZE jo_dim1;
+CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
+       WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim2 (id, dim2, val2)
+       SELECT g, 'some filler text ' || g, (g % 7) + 1
+         FROM generate_series(1,1000) g;
+VACUUM ANALYZE jo_dim2;
+
+CREATE TABLE jo_fact (
+       id int primary key,
+       dim1_id integer not null references jo_dim1 (id),
+       dim2_id integer not null references jo_dim2 (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO jo_fact
+       SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE jo_fact;
+
+-- We expect to join to d2 first and then d1, since the condition on d2
+-- is more selective.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+
+-- Force a few different join orders. Some of these are very inefficient,
+-- but the planner considers them all viable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+       LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+       LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+       WHERE val1 = 1 AND val2 = 1;
+COMMIT;
+
+-- The unusual formulation of this query is intended to prevent the query
+-- planner from reducing the FULL JOIN to some other join type, so that we
+-- can test what happens with a join type that cannot be reordered.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+
+-- We should not be able to force the planner to join f to d1 first, because
+-- that is not a valid join order, but we should be able to force the planner
+-- to make either d2 or f the driving table.
+BEGIN;
+-- XXX: The advice feedback says 'partially matched' here which isn't exactly
+-- wrong given the way that flag is handled in the code, but it's at the very
+-- least confusing. Something should probably be improved here.
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+       INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+       ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+COMMIT;
+
+-- XXX: add tests for join order prefix matching
+-- XXX: join_order(justonerel) shouldn't report partially matched
diff --git a/contrib/pg_plan_advice/sql/join_strategy.sql b/contrib/pg_plan_advice/sql/join_strategy.sql
new file mode 100644 (file)
index 0000000..8eb823f
--- /dev/null
@@ -0,0 +1,76 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE join_dim (id serial primary key, dim text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE join_dim;
+
+CREATE TABLE join_fact (
+       id int primary key,
+       dim_id integer not null references join_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO join_fact
+       SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
+VACUUM ANALYZE join_fact;
+
+-- We expect a hash join by default.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+
+-- Try forcing each join method in turn with join_dim as the inner table.
+-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
+-- fail, because the planner knows that join_dim (id) is unique, and will
+-- refuse to add mark/restore overhead.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
+
+-- Now try forcing each join method in turn with join_fact as the inner
+-- table. All of these should work.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
+
+-- We can't force a foreign join between these tables, because they
+-- aren't foreign tables.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+       SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/local_collector.sql b/contrib/pg_plan_advice/sql/local_collector.sql
new file mode 100644 (file)
index 0000000..be14539
--- /dev/null
@@ -0,0 +1,40 @@
+CREATE EXTENSION pg_plan_advice;
+
+-- Try clearing advice before we've collected any.
+SELECT pg_clear_collected_local_advice();
+
+-- Set a small advice collection limit so that we'll exceed it.
+SET pg_plan_advice.local_collection_limit = 2;
+
+-- Set up a dummy table.
+CREATE TABLE dummy_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false, parallel_workers = 0);
+
+-- Test queries.
+SELECT * FROM dummy_table a, dummy_table b;
+SELECT * FROM dummy_table;
+
+-- Should return the advice from the second test query.
+SELECT advice FROM pg_get_collected_local_advice() ORDER BY id LIMIT 1;
+
+-- Now try clearing advice again.
+SELECT pg_clear_collected_local_advice();
+
+-- Raise the collection limit so that the collector uses multiple chunks.
+SET pg_plan_advice.local_collection_limit = 2000;
+
+-- Push a bunch of queries through the collector.
+DO $$
+BEGIN
+       FOR x IN 1..2000 LOOP
+               EXECUTE 'SELECT * FROM dummy_table';
+       END LOOP;
+END
+$$;
+
+-- Check that the collector worked.
+SELECT COUNT(*) FROM pg_get_collected_local_advice();
+
+-- And clear one more time, to verify that this doesn't cause a problem
+-- even with a larger number of entries.
+SELECT pg_clear_collected_local_advice();
diff --git a/contrib/pg_plan_advice/sql/partitionwise.sql b/contrib/pg_plan_advice/sql/partitionwise.sql
new file mode 100644 (file)
index 0000000..e42c061
--- /dev/null
@@ -0,0 +1,78 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET enable_partitionwise_join = true;
+
+CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt1 (id, dim1, val1)
+       SELECT g, 'some filler text ' || g, (g % 3) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt1;
+
+CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt2 (id, dim2, val2)
+       SELECT g, 'some other text ' || g, (g % 5) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt2;
+
+CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int)
+       PARTITION BY RANGE (id);
+CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO pt3 (id, dim3, val3)
+       SELECT g, 'a third random text ' || g, (g % 7) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt3;
+
+CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int)
+       PARTITION BY RANGE (id);
+CREATE TABLE ptmismatcha PARTITION OF ptmismatch
+    FOR VALUES FROM (1) to (1501)
+       WITH (autovacuum_enabled = false);
+CREATE TABLE ptmismatchb PARTITION OF ptmismatch
+    FOR VALUES FROM (1501) to (3001)
+       WITH (autovacuum_enabled = false);
+INSERT INTO ptmismatch (id, dimm, valm)
+       SELECT g, 'yet another text ' || g, (g % 2) + 1
+         FROM generate_series(1,3000) g;
+VACUUM ANALYZE ptmismatch;
+
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+
+-- Suppress partitionwise join, or do it just partially.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+   AND val1 = 1 AND val2 = 1 AND val3 = 1;
+COMMIT;
+
+-- Can't force a partitionwise join with a mismatched table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/scan.sql b/contrib/pg_plan_advice/sql/scan.sql
new file mode 100644 (file)
index 0000000..25416a7
--- /dev/null
@@ -0,0 +1,195 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET seq_page_cost = 0.1;
+SET random_page_cost = 0.1;
+SET cpu_tuple_cost = 0;
+SET cpu_index_tuple_cost = 0;
+
+CREATE TABLE scan_table (a int primary key, b text)
+       WITH (autovacuum_enabled = false);
+INSERT INTO scan_table
+       SELECT g, 'some text ' || g FROM generate_series(1, 100000) g;
+CREATE INDEX scan_table_b ON scan_table USING brin (b);
+VACUUM ANALYZE scan_table;
+
+-- Sequential scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+
+-- Index scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+
+-- Index-only scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+
+-- Bitmap heap scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+
+-- TID scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+
+-- TID range scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+
+-- Try forcing each of our test queries to use the scan type they
+-- wanted to use anyway. This should succeed.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+COMMIT;
+
+-- Try to force a full scan of the table to use some other scan type. All
+-- of these will fail. An index scan or bitmap heap scan could potentially
+-- generate the correct answer, but the planner does not even consider these
+-- possibilities due to the lack of a WHERE clause.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+COMMIT;
+
+-- Try again to force index use. This should now succeed for the INDEX_SCAN
+-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the
+-- query fetches columns not included in the index.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+COMMIT;
+
+-- We can force a primary key lookup to use a sequential scan, but we
+-- can't force it to use an index-only scan (due to the column list)
+-- or a TID scan (due to the absence of a TID qual).
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can forcibly downgrade an index-only scan to an index scan, but we can't
+-- force the use of an index that the planner thinks is inapplicable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can force the use of a sequential scan in place of a bitmap heap scan,
+-- but a plain index scan on a BRIN index is not possible.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE b > 'some text 8';
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can force the use of a sequential scan rather than a TID scan or
+-- TID range scan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+       WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+COMMIT;
+
+-- Test more complex scenarios with index scans.
+BEGIN;
+-- Should still work if we mention the schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- But not if we mention the wrong schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- It's OK to repeat the same advice.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- But it doesn't work if the index target is even notionally different.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- Test assorted incorrect advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- Test our ability to refer to multiple instances of the same alias.
+BEGIN;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+    LEFT JOIN scan_table s ON g = s.a;
+COMMIT;
+
+-- Test our ability to refer to scans within a subquery.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+BEGIN;
+-- Should not match.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+-- Should match first query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+-- Should match second query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/syntax.sql b/contrib/pg_plan_advice/sql/syntax.sql
new file mode 100644 (file)
index 0000000..8bc1b71
--- /dev/null
@@ -0,0 +1,42 @@
+LOAD 'pg_plan_advice';
+
+-- An empty string is allowed, and so is an empty target list.
+SET pg_plan_advice.advice = '';
+SET pg_plan_advice.advice = 'SEQ_SCAN()';
+
+-- Test assorted variations in capitalization, whitespace, and which parts of
+-- the relation identifier are included. These should all work.
+SET pg_plan_advice.advice = 'SEQ_SCAN(x)';
+SET pg_plan_advice.advice = 'seq_scan(x@y)';
+SET pg_plan_advice.advice = 'SEQ_scan(x#2)';
+SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)';
+SET pg_plan_advice.advice = '  SEQ_SCAN ( x / y . z )  ';
+SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")';
+
+-- Syntax errors.
+SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)';
+SET pg_plan_advice.advice = 'SEQ_SCAN';
+SET pg_plan_advice.advice = 'SEQ_SCAN(';
+SET pg_plan_advice.advice = 'SEQ_SCAN("';
+SET pg_plan_advice.advice = 'SEQ_SCAN(#';
+SET pg_plan_advice.advice = '()';
+SET pg_plan_advice.advice = '123';
+
+-- Legal comments.
+SET pg_plan_advice.advice = '/**/';
+SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)';
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)';
+
+-- Unterminated comments.
+SET pg_plan_advice.advice = '/*';
+SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops';
+
+-- Nested comments are not supported, so the first of these is legal and
+-- the second is not.
+SET pg_plan_advice.advice = '/*/*/';
+SET pg_plan_advice.advice = '/*/* stuff */*/';
+
+-- Foreign join requires multiple relation identifiers.
+SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)';
+SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))';
diff --git a/contrib/pg_plan_advice/t/001_regress.pl b/contrib/pg_plan_advice/t/001_regress.pl
new file mode 100644 (file)
index 0000000..dffafca
--- /dev/null
@@ -0,0 +1,139 @@
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+
+# Run the core regression tests under pg_plan_advice to check for problems.
+use strict;
+use warnings FATAL => 'all';
+
+use Cwd            qw(abs_path);
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize the primary node
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init();
+
+# Set up our desired configuration.
+#
+# We run with pg_plan_advice.shared_collection_limit set to ensure that the
+# plan tree walker code runs against every query in the regression tests. If
+# we're unable to properly analyze any of those plan trees, this test should fail.
+#
+# We set pg_plan_advice.advice to an advice string that will cause the advice
+# trove to be populated with a few entries of various sorts, but which we do
+# not expect to match anything in the regression test queries. This way, the
+# planner hooks will be called, improving code coverage, but no plans should
+# actually change.
+#
+# pg_plan_advice.always_explain_supplied_advice=false is needed to avoid breaking
+# regression test queries that use EXPLAIN. In the real world, it seems like
+# users will want EXPLAIN output to show supplied advice so that it's clear
+# whether normal planner behavior has been altered, but here that's undesirable.
+$node->append_conf('postgresql.conf', <<EOM);
+pg_plan_advice.shared_collection_limit=1000000
+shared_preload_libraries=pg_plan_advice
+pg_plan_advice.advice='SEQ_SCAN(entirely_fictitious) HASH_JOIN(total_fabrication) GATHER(completely_imaginary)'
+pg_plan_advice.always_explain_supplied_advice=false
+EOM
+$node->start;
+
+my $srcdir = abs_path("../..");
+
+# --outputdir points to the path where to place the output files.
+my $outputdir = $PostgreSQL::Test::Utils::tmp_check;
+
+# --inputdir points to the path of the input files.
+my $inputdir = "$srcdir/src/test/regress";
+
+# Run the tests.
+my $rc =
+  system($ENV{PG_REGRESS} . " "
+         . "--bindir= "
+         . "--host=" . $node->host . " "
+         . "--port=" . $node->port . " "
+         . "--schedule=$srcdir/src/test/regress/parallel_schedule "
+         . "--max-concurrent-tests=20 "
+         . "--inputdir=\"$inputdir\" "
+         . "--outputdir=\"$outputdir\"");
+
+# Dump out the regression diffs file, if there is one
+if ($rc != 0)
+{
+       my $diffs = "$outputdir/regression.diffs";
+       if (-e $diffs)
+       {
+               print "=== dumping $diffs ===\n";
+               print slurp_file($diffs);
+               print "=== EOF ===\n";
+       }
+}
+
+# Report results
+is($rc, 0, 'regression tests pass');
+
+# Create the extension so we can access the collector
+$node->safe_psql('postgres', 'CREATE EXTENSION pg_plan_advice');
+
+# Verify that a large amount of advice was collected
+my $all_query_count = $node->safe_psql('postgres', <<EOM);
+SELECT COUNT(*) FROM pg_get_collected_shared_advice();
+EOM
+cmp_ok($all_query_count, '>', 40000, "copious advice collected");
+
+# Verify that lots of different advice strings were collected
+my $distinct_query_count = $node->safe_psql('postgres', <<EOM);
+SELECT COUNT(*) FROM
+       (SELECT DISTINCT advice FROM pg_get_collected_shared_advice());
+EOM
+cmp_ok($distinct_query_count, '>', 3000, "diverse advice collected");
+
+# We want to test for the presence of our known tags in the collected advice.
+# Put all tags into the hash that follows; map any tags that aren't tested
+# by the core regression tests to 0, and others to 1.
+my %tag_map = (
+       BITMAP_HEAP_SCAN => 1,
+       FOREIGN_JOIN => 0,
+       GATHER => 1,
+       GATHER_MERGE => 1,
+       HASH_JOIN => 1,
+       INDEX_ONLY_SCAN => 1,
+       INDEX_SCAN => 1,
+       JOIN_ORDER => 1,
+       MERGE_JOIN_MATERIALIZE => 1,
+       MERGE_JOIN_PLAIN => 1,
+       NESTED_LOOP_MATERIALIZE => 1,
+       NESTED_LOOP_MEMOIZE => 1,
+       NESTED_LOOP_PLAIN => 1,
+       NO_GATHER => 1,
+       PARTITIONWISE => 1,
+       SEMIJOIN_NON_UNIQUE => 1,
+       SEMIJOIN_UNIQUE => 1,
+       SEQ_SCAN => 1,
+       TID_SCAN => 1,
+);
+while (my ($tag, $checkit) = each %tag_map)
+{
+       # Search for the given tag. This is not entirely robust: it could get thrown
+       # off by a table alias such as "FOREIGN_JOIN(", but that probably won't
+       # happen in the core regression tests.
+       my $tag_count = $node->safe_psql('postgres', <<EOM);
+SELECT COUNT(*) FROM pg_get_collected_shared_advice()
+       WHERE advice LIKE '%$tag(%'
+EOM
+
+       # Check that the tag got a non-trivial amount of use, unless told otherwise.
+       cmp_ok($tag_count, '>', 10, "multiple uses of $tag") if $checkit;
+
+       # Regardless, note the exact count in the log, for human consumption.
+       note("found $tag_count advice strings containing $tag");
+}
+
+# Trigger a partial cleanup of the shared advice collector, and then a full
+# cleanup.
+$node->safe_psql('postgres', <<EOM);
+SET pg_plan_advice.shared_collection_limit=500;
+SELECT * FROM pg_clear_collected_shared_advice();
+EOM
+
+done_testing();
index 4ff47115ca8d0a5db78462f8be799c051449ffe3..d1a7e5f8c463442fb29dda79af48988f41edfc32 100644 (file)
@@ -3928,6 +3928,43 @@ pg_wc_probefunc
 pg_wchar
 pg_wchar_tbl
 pgp_armor_headers_state
+pgpa_collected_advice
+pgpa_advice_item
+pgpa_advice_tag_type
+pgpa_advice_target
+pgpa_identifier
+pgpa_index_target
+pgpa_index_type
+pgpa_itm_type
+pgpa_join_class
+pgpa_join_member
+pgpa_join_state
+pgpa_join_strategy
+pgpa_join_unroller
+pgpa_local_advice
+pgpa_local_advice_chunk
+pgpa_output_context
+pgpa_plan_walker_context
+pgpa_planner_state
+pgpa_qf_type
+pgpa_query_feature
+pgpa_ri_checker
+pgpa_ri_checker_key
+pgpa_scan
+pgpa_scan_strategy
+pgpa_shared_advice
+pgpa_shared_advice_chunk
+pgpa_shared_state
+pgpa_target_type
+pgpa_trove
+pgpa_trove_entry
+pgpa_trove_entry_element
+pgpa_trove_entry_hash
+pgpa_trove_entry_key
+pgpa_trove_lookup_type
+pgpa_trove_result
+pgpa_trove_slice
+pgpa_unrolled_join
 pgsocket
 pgsql_thing_t
 pgssEntry