8787#include "storage/bufmgr.h"
8888#include "storage/ipc.h"
8989#include "storage/latch.h"
90+ #include "storage/lmgr.h"
9091#include "storage/pmsignal.h"
9192#include "storage/proc.h"
9293#include "storage/procsignal.h"
@@ -130,17 +131,6 @@ int Log_autovacuum_min_duration = -1;
130131#define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
131132#define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */
132133
133- /*
134- * Maximum number of orphan temporary tables to drop in a single transaction.
135- * (If this is too high, we might run out of heavyweight locks.)
136- */
137- #define MAX_ORPHAN_ITEMS 50
138-
139- /*
140- * After this many failures, stop trying to drop orphan temporary tables.
141- */
142- #define MAX_ORPHAN_DROP_FAILURE 10
143-
144134/* Flags to tell if we are in an autovacuum process */
145135static bool am_autovacuum_launcher = false;
146136static bool am_autovacuum_worker = false;
@@ -1899,7 +1889,6 @@ do_autovacuum(void)
18991889 Form_pg_database dbForm ;
19001890 List * table_oids = NIL ;
19011891 List * orphan_oids = NIL ;
1902- List * pending_oids = NIL ;
19031892 HASHCTL ctl ;
19041893 HTAB * table_toast_map ;
19051894 ListCell * volatile cell ;
@@ -1908,7 +1897,6 @@ do_autovacuum(void)
19081897 BufferAccessStrategy bstrategy ;
19091898 ScanKeyData key ;
19101899 TupleDesc pg_class_desc ;
1911- int orphan_failures = 0 ;
19121900 int effective_multixact_freeze_max_age ;
19131901
19141902 /*
@@ -2000,7 +1988,7 @@ do_autovacuum(void)
20001988 * TOAST tables. The reason for doing the second pass is that during it we
20011989 * want to use the main relation's pg_class.reloptions entry if the TOAST
20021990 * table does not have any, and we cannot obtain it unless we know
2003- * beforehand what's the main table OID.
1991+ * beforehand what's the main table OID.
20041992 *
20051993 * We need to check TOAST tables separately because in cases with short,
20061994 * wide tables there might be proportionally much more activity in the
@@ -2028,16 +2016,6 @@ do_autovacuum(void)
20282016
20292017 relid = HeapTupleGetOid (tuple );
20302018
2031- /* Fetch reloptions and the pgstat entry for this table */
2032- relopts = extract_autovac_opts (tuple , pg_class_desc );
2033- tabentry = get_pgstat_tabentry_relid (relid , classForm -> relisshared ,
2034- shared , dbentry );
2035-
2036- /* Check if it needs vacuum or analyze */
2037- relation_needs_vacanalyze (relid , relopts , classForm , tabentry ,
2038- effective_multixact_freeze_max_age ,
2039- & dovacuum , & doanalyze , & wraparound );
2040-
20412019 /*
20422020 * Check if it is a temp table (presumably, of some other backend's).
20432021 * We cannot safely process other backends' temp tables.
@@ -2049,47 +2027,60 @@ do_autovacuum(void)
20492027 backendID = GetTempNamespaceBackendId (classForm -> relnamespace );
20502028
20512029 /* We just ignore it if the owning backend is still active */
2052- if (backendID == MyBackendId || BackendIdGetProc (backendID ) == NULL )
2030+ if (backendID != InvalidBackendId &&
2031+ (backendID == MyBackendId ||
2032+ BackendIdGetProc (backendID ) == NULL ))
20532033 {
20542034 /*
2055- * We found an orphan temp table which was probably left
2056- * behind by a crashed backend. Remember it, so we can attempt
2057- * to drop it.
2035+ * The table seems to be orphaned -- although it might be that
2036+ * the owning backend has already deleted it and exited; our
2037+ * pg_class scan snapshot is not necessarily up-to-date
2038+ * anymore, so we could be looking at a committed-dead entry.
2039+ * Remember it so we can try to delete it later.
20582040 */
20592041 orphan_oids = lappend_oid (orphan_oids , relid );
20602042 }
2043+ continue ;
20612044 }
2062- else
2063- {
2064- /* relations that need work are added to table_oids */
2065- if (dovacuum || doanalyze )
2066- table_oids = lappend_oid (table_oids , relid );
20672045
2068- /*
2069- * Remember the association for the second pass. Note: we must do
2070- * this even if the table is going to be vacuumed, because we
2071- * don't automatically vacuum toast tables along the parent table.
2072- */
2073- if (OidIsValid (classForm -> reltoastrelid ))
2074- {
2075- av_relation * hentry ;
2076- bool found ;
2046+ /* Fetch reloptions and the pgstat entry for this table */
2047+ relopts = extract_autovac_opts (tuple , pg_class_desc );
2048+ tabentry = get_pgstat_tabentry_relid (relid , classForm -> relisshared ,
2049+ shared , dbentry );
2050+
2051+ /* Check if it needs vacuum or analyze */
2052+ relation_needs_vacanalyze (relid , relopts , classForm , tabentry ,
2053+ effective_multixact_freeze_max_age ,
2054+ & dovacuum , & doanalyze , & wraparound );
2055+
2056+ /* Relations that need work are added to table_oids */
2057+ if (dovacuum || doanalyze )
2058+ table_oids = lappend_oid (table_oids , relid );
2059+
2060+ /*
2061+ * Remember TOAST associations for the second pass. Note: we must do
2062+ * this whether or not the table is going to be vacuumed, because we
2063+ * don't automatically vacuum toast tables along the parent table.
2064+ */
2065+ if (OidIsValid (classForm -> reltoastrelid ))
2066+ {
2067+ av_relation * hentry ;
2068+ bool found ;
20772069
2078- hentry = hash_search (table_toast_map ,
2079- & classForm -> reltoastrelid ,
2080- HASH_ENTER , & found );
2070+ hentry = hash_search (table_toast_map ,
2071+ & classForm -> reltoastrelid ,
2072+ HASH_ENTER , & found );
20812073
2082- if (!found )
2074+ if (!found )
2075+ {
2076+ /* hash_search already filled in the key */
2077+ hentry -> ar_relid = relid ;
2078+ hentry -> ar_hasrelopts = false;
2079+ if (relopts != NULL )
20832080 {
2084- /* hash_search already filled in the key */
2085- hentry -> ar_relid = relid ;
2086- hentry -> ar_hasrelopts = false;
2087- if (relopts != NULL )
2088- {
2089- hentry -> ar_hasrelopts = true;
2090- memcpy (& hentry -> ar_reloptions , relopts ,
2091- sizeof (AutoVacOpts ));
2092- }
2081+ hentry -> ar_hasrelopts = true;
2082+ memcpy (& hentry -> ar_reloptions , relopts ,
2083+ sizeof (AutoVacOpts ));
20932084 }
20942085 }
20952086 }
@@ -2154,112 +2145,90 @@ do_autovacuum(void)
21542145 heap_close (classRel , AccessShareLock );
21552146
21562147 /*
2157- * Loop through orphan temporary tables and drop them in batches. If
2158- * we're unable to drop one particular table, we'll retry to see if we
2159- * can drop others, but if we fail too many times we'll give up and proceed
2160- * with our regular work, so that this step hopefully can't wedge
2161- * autovacuum for too long.
2148+ * Recheck orphan temporary tables, and if they still seem orphaned, drop
2149+ * them. We'll eat a transaction per dropped table, which might seem
2150+ * excessive, but we should only need to do anything as a result of a
2151+ * previous backend crash, so this should not happen often enough to
2152+ * justify "optimizing". Using separate transactions ensures that we
2153+ * don't bloat the lock table if there are many temp tables to be dropped,
2154+ * and it ensures that we don't lose work if a deletion attempt fails.
21622155 */
2163- while (list_length (orphan_oids ) > 0 &&
2164- orphan_failures < MAX_ORPHAN_DROP_FAILURE )
2156+ foreach (cell , orphan_oids )
21652157 {
2166- Oid relid = linitial_oid ( orphan_oids );
2167- ObjectAddress object ;
2168- char * namespace = get_namespace_name ( get_rel_namespace ( relid )) ;
2169- char * relname = get_rel_name ( relid ) ;
2158+ Oid relid = lfirst_oid ( cell );
2159+ Form_pg_class classForm ;
2160+ int backendID ;
2161+ ObjectAddress object ;
21702162
2171- orphan_oids = list_delete_first (orphan_oids );
2163+ /*
2164+ * Check for user-requested abort.
2165+ */
2166+ CHECK_FOR_INTERRUPTS ();
21722167
2173- PG_TRY ();
2174- {
2175- ereport (LOG ,
2176- (errmsg ("autovacuum: dropping orphan temp table \"%s\".\"%s\" in database \"%s\"" ,
2177- namespace , relname ,
2178- get_database_name (MyDatabaseId ))));
2179- object .classId = RelationRelationId ;
2180- object .objectId = relid ;
2181- object .objectSubId = 0 ;
2182- performDeletion (& object , DROP_CASCADE , PERFORM_DELETION_INTERNAL );
2168+ /*
2169+ * Try to lock the table. If we can't get the lock immediately,
2170+ * somebody else is using (or dropping) the table, so it's not our
2171+ * concern anymore. Having the lock prevents race conditions below.
2172+ */
2173+ if (!ConditionalLockRelationOid (relid , AccessExclusiveLock ))
2174+ continue ;
21832175
2184- /*
2185- * This orphan table has been dropped correctly, add it to the
2186- * list of tables whose drop should be attempted again if an
2187- * error after in the same transaction.
2188- */
2189- pending_oids = lappend_oid (pending_oids , relid );
2190- }
2191- PG_CATCH ();
2176+ /*
2177+ * Re-fetch the pg_class tuple and re-check whether it still seems to
2178+ * be an orphaned temp table. If it's not there or no longer the same
2179+ * relation, ignore it.
2180+ */
2181+ tuple = SearchSysCacheCopy1 (RELOID , ObjectIdGetDatum (relid ));
2182+ if (!HeapTupleIsValid (tuple ))
21922183 {
2193- /* Abort the current transaction. */
2194- HOLD_INTERRUPTS ();
2195-
2196- errcontext ("dropping of orphan temp table \"%s\".\"%s\" in database \"%s\"" ,
2197- namespace , relname ,
2198- get_database_name (MyDatabaseId ));
2199-
2200- EmitErrorReport ();
2201-
2202- /* this resets the PGXACT flags too */
2203- AbortOutOfAnyTransaction ();
2204- FlushErrorState ();
2205-
2206- /*
2207- * Any tables were succesfully dropped before the failure now
2208- * need to be dropped again. Add them back into the list, but
2209- * don't retry the table that failed.
2210- */
2211- orphan_oids = list_concat (orphan_oids , pending_oids );
2212- orphan_failures ++ ;
2213-
2214- /* Start a new transaction. */
2215- StartTransactionCommand ();
2216-
2217- /* StartTransactionCommand changed elsewhere the memory context */
2218- MemoryContextSwitchTo (AutovacMemCxt );
2219-
2220- RESUME_INTERRUPTS ();
2184+ /* be sure to drop useless lock so we don't bloat lock table */
2185+ UnlockRelationOid (relid , AccessExclusiveLock );
2186+ continue ;
22212187 }
2222- PG_END_TRY ( );
2188+ classForm = ( Form_pg_class ) GETSTRUCT ( tuple );
22232189
22242190 /*
2225- * If we've successfully dropped quite a few tables, commit the
2226- * transaction and begin a new one. The main point of this is to
2227- * avoid accumulating too many locks and blowing out the lock table,
2228- * but it also minimizes the amount of work that will have to be rolled
2229- * back if we fail to drop some table later in the list.
2191+ * Make all the same tests made in the loop above. In event of OID
2192+ * counter wraparound, the pg_class entry we have now might be
2193+ * completely unrelated to the one we saw before.
22302194 */
2231- if (list_length (pending_oids ) >= MAX_ORPHAN_ITEMS )
2195+ if (!((classForm -> relkind == RELKIND_RELATION ||
2196+ classForm -> relkind == RELKIND_MATVIEW ) &&
2197+ classForm -> relpersistence == RELPERSISTENCE_TEMP ))
22322198 {
2233- CommitTransactionCommand ();
2234- StartTransactionCommand ();
2235-
2236- /* StartTransactionCommand changed elsewhere */
2237- MemoryContextSwitchTo (AutovacMemCxt );
2238-
2239- list_free (pending_oids );
2240- pending_oids = NIL ;
2199+ UnlockRelationOid (relid , AccessExclusiveLock );
2200+ continue ;
2201+ }
2202+ backendID = GetTempNamespaceBackendId (classForm -> relnamespace );
2203+ if (!(backendID != InvalidBackendId &&
2204+ (backendID == MyBackendId ||
2205+ BackendIdGetProc (backendID ) == NULL )))
2206+ {
2207+ UnlockRelationOid (relid , AccessExclusiveLock );
2208+ continue ;
22412209 }
22422210
2243- pfree (relname );
2244- pfree (namespace );
2245- }
2211+ /* OK, let's delete it */
2212+ ereport (LOG ,
2213+ (errmsg ("autovacuum: dropping orphan temp table \"%s.%s.%s\"" ,
2214+ get_database_name (MyDatabaseId ),
2215+ get_namespace_name (classForm -> relnamespace ),
2216+ NameStr (classForm -> relname ))));
22462217
2247- /*
2248- * Commit current transaction to finish the cleanup done previously and
2249- * restart a new one to not bloat the activity of the following steps.
2250- * This needs to happen only if there are any items thought as previously
2251- * pending, but are actually not as the last transaction doing the cleanup
2252- * has been successful.
2253- */
2254- if ( list_length ( pending_oids ) > 0 )
2255- {
2218+ object . classId = RelationRelationId ;
2219+ object . objectId = relid ;
2220+ object . objectSubId = 0 ;
2221+ performDeletion ( & object , DROP_CASCADE , PERFORM_DELETION_INTERNAL );
2222+
2223+ /*
2224+ * To commit the deletion, end current transaction and start a new
2225+ * one. Note this also releases the lock we took.
2226+ */
22562227 CommitTransactionCommand ();
22572228 StartTransactionCommand ();
22582229
2259- /* StartTransactionCommand changed elsewhere */
2230+ /* StartTransactionCommand changed current memory context */
22602231 MemoryContextSwitchTo (AutovacMemCxt );
2261-
2262- list_free (pending_oids );
22632232 }
22642233
22652234 /*
0 commit comments