From 4568b1a8ac7f74f54720a413fd8cd9cda1be32a7 Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Fri, 6 Sep 2024 09:55:54 +0200
Subject: [PATCH 1/8] Adjust signature of cluster_rel() and its subroutines.

So far cluster_rel() received OID of the relation it should process and it
performed opening and locking of the relation itself. Yet copy_table_data()
received the OID as well and also had to open the relation itself. This patch
tries to eliminate the repeated opening and closing.

One particular reason for this change is that the VACUUM FULL / CLUSTER
command with the CONCURRENTLY option will need to release all locks on the
relation (and possibly on the clustering index) at some point. Since it makes
little sense to keep relation reference w/o lock, the cluster_rel() function
also closes its reference to the relation (and its index). Neither the
function nor its subroutines may open extra references because then it'd be a
bit harder to close them all.
---
 src/backend/commands/cluster.c   | 146 ++++++++++++++++++-------------
 src/backend/commands/matview.c   |   2 +-
 src/backend/commands/tablecmds.c |   2 +-
 src/backend/commands/vacuum.c    |  12 +--
 src/include/commands/cluster.h   |   5 +-
 5 files changed, 99 insertions(+), 68 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 78f96789b0..bedc177ce4 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -70,8 +70,8 @@ typedef struct
 
 
 static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
-static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
-static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+static void rebuild_relation(Relation OldHeap, Relation index, bool verbose);
+static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
@@ -194,11 +194,11 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 
 		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
 		{
-			/* close relation, keep lock till commit */
-			table_close(rel, NoLock);
-
-			/* Do the job. */
-			cluster_rel(tableOid, indexOid, &params);
+			/*
+			 * Do the job. (The function will close the relation, lock is kept
+			 * till commit.)
+			 */
+			cluster_rel(rel, indexOid, &params);
 
 			return;
 		}
@@ -275,6 +275,7 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params)
 	foreach(lc, rtcs)
 	{
 		RelToCluster *rtc = (RelToCluster *) lfirst(lc);
+		Relation	rel;
 
 		/* Start a new transaction for each relation. */
 		StartTransactionCommand();
@@ -282,8 +283,13 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params)
 		/* functions in indexes may want a snapshot set */
 		PushActiveSnapshot(GetTransactionSnapshot());
 
-		/* Do the job. */
-		cluster_rel(rtc->tableOid, rtc->indexOid, params);
+		rel = table_open(rtc->tableOid, AccessExclusiveLock);
+
+		/*
+		 * Do the job. (The function will close the relation, lock is kept
+		 * till commit.)
+		 */
+		cluster_rel(rel, rtc->indexOid, params);
 
 		PopActiveSnapshot();
 		CommitTransactionCommand();
@@ -306,16 +312,19 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params)
  * If indexOid is InvalidOid, the table will be rewritten in physical order
  * instead of index order.  This is the new implementation of VACUUM FULL,
  * and error messages should refer to the operation as VACUUM not CLUSTER.
+ *
+ * We expect that OldHeap is already locked in AccessExclusiveLock mode.
  */
 void
-cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
+cluster_rel(Relation OldHeap, Oid indexOid, ClusterParams *params)
 {
-	Relation	OldHeap;
+	Oid			tableOid = RelationGetRelid(OldHeap);
 	Oid			save_userid;
 	int			save_sec_context;
 	int			save_nestlevel;
 	bool		verbose = ((params->options & CLUOPT_VERBOSE) != 0);
 	bool		recheck = ((params->options & CLUOPT_RECHECK) != 0);
+	Relation	index = NULL;
 
 	/* Check for user-requested abort. */
 	CHECK_FOR_INTERRUPTS();
@@ -328,21 +337,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 		pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
 									 PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
 
-	/*
-	 * We grab exclusive access to the target rel and index for the duration
-	 * of the transaction.  (This is redundant for the single-transaction
-	 * case, since cluster() already did it.)  The index lock is taken inside
-	 * check_index_is_clusterable.
-	 */
-	OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
-
-	/* If the table has gone away, we can skip processing it */
-	if (!OldHeap)
-	{
-		pgstat_progress_end_command();
-		return;
-	}
-
 	/*
 	 * Switch to the table owner's userid, so that any index functions are run
 	 * as that user.  Also lock down security-restricted operations and
@@ -445,7 +439,11 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 
 	/* Check heap and index are valid to cluster on */
 	if (OidIsValid(indexOid))
+	{
 		check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
+		/* Open the index (It should already be locked.) */
+		index = index_open(indexOid, NoLock);
+	}
 
 	/*
 	 * Quietly ignore the request if this is a materialized view which has not
@@ -474,9 +472,12 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 	TransferPredicateLocksToHeapRelation(OldHeap);
 
 	/* rebuild_relation does all the dirty work */
-	rebuild_relation(OldHeap, indexOid, verbose);
+	rebuild_relation(OldHeap, index, verbose);
 
-	/* NB: rebuild_relation does table_close() on OldHeap */
+	/*
+	 * NB: rebuild_relation does table_close() on OldHeap, and also on index,
+	 * if the pointer is valid.
+	 */
 
 out:
 	/* Roll back any GUC changes executed by index functions */
@@ -625,22 +626,27 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
  * rebuild_relation: rebuild an existing relation in index or physical order
  *
  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
- * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
+ * index: index to cluster by, or NULL to rewrite in physical order. Must be
+ * opened and locked.
  *
- * NB: this routine closes OldHeap at the right time; caller should not.
+ * On exit, the heap (and also the index, if one was passed) are closed, but
+ * still locked with AccessExclusiveLock.
  */
 static void
-rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
+rebuild_relation(Relation OldHeap, Relation index, bool verbose)
 {
 	Oid			tableOid = RelationGetRelid(OldHeap);
+	Oid			indexOid = index ? RelationGetRelid(index) : InvalidOid;
 	Oid			accessMethod = OldHeap->rd_rel->relam;
 	Oid			tableSpace = OldHeap->rd_rel->reltablespace;
 	Oid			OIDNewHeap;
+	Relation	NewHeap;
 	char		relpersistence;
 	bool		is_system_catalog;
 	bool		swap_toast_by_content;
 	TransactionId frozenXid;
 	MultiXactId cutoffMulti;
+	LOCKMODE	lockmode_new;
 
 	if (OidIsValid(indexOid))
 		/* Mark the correct index as clustered */
@@ -650,19 +656,40 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
 	relpersistence = OldHeap->rd_rel->relpersistence;
 	is_system_catalog = IsSystemRelation(OldHeap);
 
-	/* Close relcache entry, but keep lock until transaction commit */
-	table_close(OldHeap, NoLock);
-
-	/* Create the transient table that will receive the re-ordered data */
+	/*
+	 * Create the transient table that will receive the re-ordered data.
+	 *
+	 * NoLock for the old heap because we already have it locked and want to
+	 * keep unlocking straightforward.
+	 */
+	lockmode_new = AccessExclusiveLock;
 	OIDNewHeap = make_new_heap(tableOid, tableSpace,
 							   accessMethod,
 							   relpersistence,
-							   AccessExclusiveLock);
+							   NoLock, &lockmode_new);
+	Assert(lockmode_new == AccessExclusiveLock || lockmode_new == NoLock);
+	/* Lock iff not done above. */
+	NewHeap = table_open(OIDNewHeap, lockmode_new == NoLock ?
+						 AccessExclusiveLock : NoLock);
 
 	/* Copy the heap data into the new table in the desired order */
-	copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
+	copy_table_data(NewHeap, OldHeap, index, verbose,
 					&swap_toast_by_content, &frozenXid, &cutoffMulti);
 
+
+	/* Close relcache entries, but keep lock until transaction commit */
+	table_close(OldHeap, NoLock);
+	if (index)
+		index_close(index, NoLock);
+
+	/*
+	 * Close the new relation so it can be dropped as soon as the storage is
+	 * swapped. The relation is not visible to others, so we could unlock it
+	 * completely, but it's simpler to pass NoLock than to track all the locks
+	 * acquired so far.
+	 */
+	table_close(NewHeap, NoLock);
+
 	/*
 	 * Swap the physical files of the target and transient tables, then
 	 * rebuild the target's indexes and throw away the transient table.
@@ -683,10 +710,15 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
  *
  * After this, the caller should load the new heap with transferred/modified
  * data, then call finish_heap_swap to complete the operation.
+ *
+ * If a specific lock mode is needed for the new relation, pass it via the
+ * in/out parameter lockmode_new_p. On exit, the output value tells whether
+ * the lock was actually acquired.
  */
 Oid
 make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
-			  char relpersistence, LOCKMODE lockmode)
+			  char relpersistence, LOCKMODE lockmode_old,
+			  LOCKMODE *lockmode_new_p)
 {
 	TupleDesc	OldHeapDesc;
 	char		NewHeapName[NAMEDATALEN];
@@ -697,8 +729,17 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
 	Datum		reloptions;
 	bool		isNull;
 	Oid			namespaceid;
+	LOCKMODE	lockmode_new;
 
-	OldHeap = table_open(OIDOldHeap, lockmode);
+	if (lockmode_new_p)
+	{
+		lockmode_new = *lockmode_new_p;
+		*lockmode_new_p = NoLock;
+	}
+	else
+		lockmode_new = lockmode_old;
+
+	OldHeap = table_open(OIDOldHeap, lockmode_old);
 	OldHeapDesc = RelationGetDescr(OldHeap);
 
 	/*
@@ -792,7 +833,9 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
 		if (isNull)
 			reloptions = (Datum) 0;
 
-		NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
+		NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode_new, toastid);
+		if (lockmode_new_p)
+			*lockmode_new_p = lockmode_new;
 
 		ReleaseSysCache(tuple);
 	}
@@ -811,13 +854,13 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
  */
 static void
-copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
+copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose,
 				bool *pSwapToastByContent, TransactionId *pFreezeXid,
 				MultiXactId *pCutoffMulti)
 {
-	Relation	NewHeap,
-				OldHeap,
-				OldIndex;
+	Oid		OIDOldHeap = RelationGetRelid(OldHeap);
+	Oid		OIDOldIndex = OldIndex ? RelationGetRelid(OldIndex) : InvalidOid;
+	Oid		OIDNewHeap = RelationGetRelid(NewHeap);
 	Relation	relRelation;
 	HeapTuple	reltup;
 	Form_pg_class relform;
@@ -836,16 +879,6 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 
 	pg_rusage_init(&ru0);
 
-	/*
-	 * Open the relations we need.
-	 */
-	NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
-	OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
-	if (OidIsValid(OIDOldIndex))
-		OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
-	else
-		OldIndex = NULL;
-
 	/* Store a copy of the namespace name for logging purposes */
 	nspname = get_namespace_name(RelationGetNamespace(OldHeap));
 
@@ -1001,11 +1034,6 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 					   tups_recently_dead,
 					   pg_rusage_show(&ru0))));
 
-	if (OldIndex != NULL)
-		index_close(OldIndex, NoLock);
-	table_close(OldHeap, NoLock);
-	table_close(NewHeap, NoLock);
-
 	/* Update pg_class to reflect the correct values of pages and tuples. */
 	relRelation = table_open(RelationRelationId, RowExclusiveLock);
 
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index b2457f121a..7da6647f8f 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -318,7 +318,7 @@ RefreshMatViewByOid(Oid matviewOid, bool is_create, bool skipData,
 	 */
 	OIDNewHeap = make_new_heap(matviewOid, tableSpace,
 							   matviewRel->rd_rel->relam,
-							   relpersistence, ExclusiveLock);
+							   relpersistence, ExclusiveLock, NULL);
 	LockRelationOid(OIDNewHeap, AccessExclusiveLock);
 
 	/* Generate the data, if wanted. */
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b3cc6f8f69..2b20b03224 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -5783,7 +5783,7 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode,
 			 * unlogged anyway.
 			 */
 			OIDNewHeap = make_new_heap(tab->relid, NewTableSpace, NewAccessMethod,
-									   persistence, lockmode);
+									   persistence, lockmode, NULL);
 
 			/*
 			 * Copy the heap data into the new table with the desired
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 7d8e9d2045..d32068b5d5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -2193,15 +2193,17 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 		{
 			ClusterParams cluster_params = {0};
 
-			/* close relation before vacuuming, but hold lock until commit */
-			relation_close(rel, NoLock);
-			rel = NULL;
-
 			if ((params->options & VACOPT_VERBOSE) != 0)
 				cluster_params.options |= CLUOPT_VERBOSE;
 
 			/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
-			cluster_rel(relid, InvalidOid, &cluster_params);
+			cluster_rel(rel, InvalidOid, &cluster_params);
+
+			/*
+			 * cluster_rel() should have closed the relation, lock is kept
+			 * till commit.
+			 */
+			rel = NULL;
 		}
 		else
 			table_relation_vacuum(rel, params, bstrategy);
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index 4e32380417..7492796ea2 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -32,13 +32,14 @@ typedef struct ClusterParams
 } ClusterParams;
 
 extern void cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel);
-extern void cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params);
+extern void cluster_rel(Relation OldHeap, Oid indexOid, ClusterParams *params);
 extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid,
 									   LOCKMODE lockmode);
 extern void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal);
 
 extern Oid	make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
-						  char relpersistence, LOCKMODE lockmode);
+						  char relpersistence, LOCKMODE lockmode_old,
+						  LOCKMODE *lockmode_new_p);
 extern void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
 							 bool is_system_catalog,
 							 bool swap_toast_by_content,
-- 
2.45.2

