@@ -147,6 +147,8 @@ static int32 partition_bound_cmp(PartitionKey key,
147147static int partition_bound_bsearch (PartitionKey key ,
148148 PartitionBoundInfo boundinfo ,
149149 void * probe , bool probe_is_bound , bool * is_equal );
150+ static void get_partition_dispatch_recurse (Relation rel , Relation parent ,
151+ List * * pds , List * * leaf_part_oids );
150152
151153/*
152154 * RelationBuildPartitionDesc
@@ -1191,21 +1193,6 @@ get_partition_qual_relid(Oid relid)
11911193 return result ;
11921194}
11931195
1194- /*
1195- * Append OIDs of rel's partitions to the list 'partoids' and for each OID,
1196- * append pointer rel to the list 'parents'.
1197- */
1198- #define APPEND_REL_PARTITION_OIDS (rel , partoids , parents ) \
1199- do\
1200- {\
1201- int i;\
1202- for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
1203- {\
1204- (partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
1205- (parents) = lappend((parents), (rel));\
1206- }\
1207- } while(0)
1208-
12091196/*
12101197 * RelationGetPartitionDispatchInfo
12111198 * Returns information necessary to route tuples down a partition tree
@@ -1222,151 +1209,130 @@ PartitionDispatch *
12221209RelationGetPartitionDispatchInfo (Relation rel ,
12231210 int * num_parted , List * * leaf_part_oids )
12241211{
1212+ List * pdlist = NIL ;
12251213 PartitionDispatchData * * pd ;
1226- List * all_parts = NIL ,
1227- * all_parents = NIL ,
1228- * parted_rels ,
1229- * parted_rel_parents ;
1230- ListCell * lc1 ,
1231- * lc2 ;
1232- int i ,
1233- k ,
1234- offset ;
1214+ ListCell * lc ;
1215+ int i ;
12351216
1236- /*
1237- * We rely on the relcache to traverse the partition tree to build both
1238- * the leaf partition OIDs list and the array of PartitionDispatch objects
1239- * for the partitioned tables in the tree. That means every partitioned
1240- * table in the tree must be locked, which is fine since we require the
1241- * caller to lock all the partitions anyway.
1242- *
1243- * For every partitioned table in the tree, starting with the root
1244- * partitioned table, add its relcache entry to parted_rels, while also
1245- * queuing its partitions (in the order in which they appear in the
1246- * partition descriptor) to be looked at later in the same loop. This is
1247- * a bit tricky but works because the foreach() macro doesn't fetch the
1248- * next list element until the bottom of the loop.
1249- */
1250- * num_parted = 1 ;
1251- parted_rels = list_make1 (rel );
1252- /* Root partitioned table has no parent, so NULL for parent */
1253- parted_rel_parents = list_make1 (NULL );
1254- APPEND_REL_PARTITION_OIDS (rel , all_parts , all_parents );
1255- forboth (lc1 , all_parts , lc2 , all_parents )
1217+ Assert (rel -> rd_rel -> relkind == RELKIND_PARTITIONED_TABLE );
1218+
1219+ * num_parted = 0 ;
1220+ * leaf_part_oids = NIL ;
1221+
1222+ get_partition_dispatch_recurse (rel , NULL , & pdlist , leaf_part_oids );
1223+ * num_parted = list_length (pdlist );
1224+ pd = (PartitionDispatchData * * ) palloc (* num_parted *
1225+ sizeof (PartitionDispatchData * ));
1226+ i = 0 ;
1227+ foreach (lc , pdlist )
12561228 {
1257- Oid partrelid = lfirst_oid ( lc1 );
1258- Relation parent = lfirst ( lc2 );
1229+ pd [ i ++ ] = lfirst ( lc );
1230+ }
12591231
1260- if (get_rel_relkind (partrelid ) == RELKIND_PARTITIONED_TABLE )
1261- {
1262- /*
1263- * Already locked by the caller. Note that it is the
1264- * responsibility of the caller to close the below relcache entry,
1265- * once done using the information being collected here (for
1266- * example, in ExecEndModifyTable).
1267- */
1268- Relation partrel = heap_open (partrelid , NoLock );
1232+ return pd ;
1233+ }
12691234
1270- (* num_parted )++ ;
1271- parted_rels = lappend (parted_rels , partrel );
1272- parted_rel_parents = lappend (parted_rel_parents , parent );
1273- APPEND_REL_PARTITION_OIDS (partrel , all_parts , all_parents );
1274- }
1235+ /*
1236+ * get_partition_dispatch_recurse
1237+ * Recursively expand partition tree rooted at rel
1238+ *
1239+ * As the partition tree is expanded in a depth-first manner, we mantain two
1240+ * global lists: of PartitionDispatch objects corresponding to partitioned
1241+ * tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
1242+ *
1243+ * Note that the order of OIDs of leaf partitions in leaf_part_oids matches
1244+ * the order in which the planner's expand_partitioned_rtentry() processes
1245+ * them. It's not necessarily the case that the offsets match up exactly,
1246+ * because constraint exclusion might prune away some partitions on the
1247+ * planner side, whereas we'll always have the complete list; but unpruned
1248+ * partitions will appear in the same order in the plan as they are returned
1249+ * here.
1250+ */
1251+ static void
1252+ get_partition_dispatch_recurse (Relation rel , Relation parent ,
1253+ List * * pds , List * * leaf_part_oids )
1254+ {
1255+ TupleDesc tupdesc = RelationGetDescr (rel );
1256+ PartitionDesc partdesc = RelationGetPartitionDesc (rel );
1257+ PartitionKey partkey = RelationGetPartitionKey (rel );
1258+ PartitionDispatch pd ;
1259+ int i ;
1260+
1261+ check_stack_depth ();
1262+
1263+ /* Build a PartitionDispatch for this table and add it to *pds. */
1264+ pd = (PartitionDispatch ) palloc (sizeof (PartitionDispatchData ));
1265+ * pds = lappend (* pds , pd );
1266+ pd -> reldesc = rel ;
1267+ pd -> key = partkey ;
1268+ pd -> keystate = NIL ;
1269+ pd -> partdesc = partdesc ;
1270+ if (parent != NULL )
1271+ {
1272+ /*
1273+ * For every partitioned table other than the root, we must store a
1274+ * tuple table slot initialized with its tuple descriptor and a tuple
1275+ * conversion map to convert a tuple from its parent's rowtype to its
1276+ * own. That is to make sure that we are looking at the correct row
1277+ * using the correct tuple descriptor when computing its partition key
1278+ * for tuple routing.
1279+ */
1280+ pd -> tupslot = MakeSingleTupleTableSlot (tupdesc );
1281+ pd -> tupmap = convert_tuples_by_name (RelationGetDescr (parent ),
1282+ tupdesc ,
1283+ gettext_noop ("could not convert row type" ));
1284+ }
1285+ else
1286+ {
1287+ /* Not required for the root partitioned table */
1288+ pd -> tupslot = NULL ;
1289+ pd -> tupmap = NULL ;
12751290 }
12761291
12771292 /*
1278- * We want to create two arrays - one for leaf partitions and another for
1279- * partitioned tables (including the root table and internal partitions).
1280- * While we only create the latter here, leaf partition array of suitable
1281- * objects (such as, ResultRelInfo) is created by the caller using the
1282- * list of OIDs we return. Indexes into these arrays get assigned in a
1283- * breadth-first manner, whereby partitions of any given level are placed
1284- * consecutively in the respective arrays.
1293+ * Go look at each partition of this table. If it's a leaf partition,
1294+ * simply add its OID to *leaf_part_oids. If it's a partitioned table,
1295+ * recursively call get_partition_dispatch_recurse(), so that its
1296+ * partitions are processed as well and a corresponding PartitionDispatch
1297+ * object gets added to *pds.
1298+ *
1299+ * About the values in pd->indexes: for a leaf partition, it contains the
1300+ * leaf partition's position in the global list *leaf_part_oids minus 1,
1301+ * whereas for a partitioned table partition, it contains the partition's
1302+ * position in the global list *pds multiplied by -1. The latter is
1303+ * multiplied by -1 to distinguish partitioned tables from leaf partitions
1304+ * when going through the values in pd->indexes. So, for example, when
1305+ * using it during tuple-routing, encountering a value >= 0 means we found
1306+ * a leaf partition. It is immediately returned as the index in the array
1307+ * of ResultRelInfos of all the leaf partitions, using which we insert the
1308+ * tuple into that leaf partition. A negative value means we found a
1309+ * partitioned table. The value multiplied by -1 is returned as the index
1310+ * in the array of PartitionDispatch objects of all partitioned tables in
1311+ * the tree. This value is used to continue the search in the next level
1312+ * of the partition tree.
12851313 */
1286- pd = (PartitionDispatchData * * ) palloc (* num_parted *
1287- sizeof (PartitionDispatchData * ));
1288- * leaf_part_oids = NIL ;
1289- i = k = offset = 0 ;
1290- forboth (lc1 , parted_rels , lc2 , parted_rel_parents )
1314+ pd -> indexes = (int * ) palloc (partdesc -> nparts * sizeof (int ));
1315+ for (i = 0 ; i < partdesc -> nparts ; i ++ )
12911316 {
1292- Relation partrel = lfirst (lc1 );
1293- Relation parent = lfirst (lc2 );
1294- PartitionKey partkey = RelationGetPartitionKey (partrel );
1295- TupleDesc tupdesc = RelationGetDescr (partrel );
1296- PartitionDesc partdesc = RelationGetPartitionDesc (partrel );
1297- int j ,
1298- m ;
1299-
1300- pd [i ] = (PartitionDispatch ) palloc (sizeof (PartitionDispatchData ));
1301- pd [i ]-> reldesc = partrel ;
1302- pd [i ]-> key = partkey ;
1303- pd [i ]-> keystate = NIL ;
1304- pd [i ]-> partdesc = partdesc ;
1305- if (parent != NULL )
1317+ Oid partrelid = partdesc -> oids [i ];
1318+
1319+ if (get_rel_relkind (partrelid ) != RELKIND_PARTITIONED_TABLE )
13061320 {
1307- /*
1308- * For every partitioned table other than root, we must store a
1309- * tuple table slot initialized with its tuple descriptor and a
1310- * tuple conversion map to convert a tuple from its parent's
1311- * rowtype to its own. That is to make sure that we are looking at
1312- * the correct row using the correct tuple descriptor when
1313- * computing its partition key for tuple routing.
1314- */
1315- pd [i ]-> tupslot = MakeSingleTupleTableSlot (tupdesc );
1316- pd [i ]-> tupmap = convert_tuples_by_name (RelationGetDescr (parent ),
1317- tupdesc ,
1318- gettext_noop ("could not convert row type" ));
1321+ * leaf_part_oids = lappend_oid (* leaf_part_oids , partrelid );
1322+ pd -> indexes [i ] = list_length (* leaf_part_oids ) - 1 ;
13191323 }
13201324 else
13211325 {
1322- /* Not required for the root partitioned table */
1323- pd [i ]-> tupslot = NULL ;
1324- pd [i ]-> tupmap = NULL ;
1325- }
1326- pd [i ]-> indexes = (int * ) palloc (partdesc -> nparts * sizeof (int ));
1327-
1328- /*
1329- * Indexes corresponding to the internal partitions are multiplied by
1330- * -1 to distinguish them from those of leaf partitions. Encountering
1331- * an index >= 0 means we found a leaf partition, which is immediately
1332- * returned as the partition we are looking for. A negative index
1333- * means we found a partitioned table, whose PartitionDispatch object
1334- * is located at the above index multiplied back by -1. Using the
1335- * PartitionDispatch object, search is continued further down the
1336- * partition tree.
1337- */
1338- m = 0 ;
1339- for (j = 0 ; j < partdesc -> nparts ; j ++ )
1340- {
1341- Oid partrelid = partdesc -> oids [j ];
1326+ /*
1327+ * We assume all tables in the partition tree were already locked
1328+ * by the caller.
1329+ */
1330+ Relation partrel = heap_open (partrelid , NoLock );
13421331
1343- if (get_rel_relkind (partrelid ) != RELKIND_PARTITIONED_TABLE )
1344- {
1345- * leaf_part_oids = lappend_oid (* leaf_part_oids , partrelid );
1346- pd [i ]-> indexes [j ] = k ++ ;
1347- }
1348- else
1349- {
1350- /*
1351- * offset denotes the number of partitioned tables of upper
1352- * levels including those of the current level. Any partition
1353- * of this table must belong to the next level and hence will
1354- * be placed after the last partitioned table of this level.
1355- */
1356- pd [i ]-> indexes [j ] = - (1 + offset + m );
1357- m ++ ;
1358- }
1332+ pd -> indexes [i ] = - list_length (* pds );
1333+ get_partition_dispatch_recurse (partrel , rel , pds , leaf_part_oids );
13591334 }
1360- i ++ ;
1361-
1362- /*
1363- * This counts the number of partitioned tables at upper levels
1364- * including those of the current level.
1365- */
1366- offset += m ;
13671335 }
1368-
1369- return pd ;
13701336}
13711337
13721338/* Module-local functions */
0 commit comments