summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorAlvaro Herrera2021-03-25 21:00:28 +0000
committerAlvaro Herrera2021-03-25 21:00:28 +0000
commit71f4c8c6f74ba021e55d35b1128d22fb8c6e1629 (patch)
treec53d5e70ef2c8ec1723c9fb62fc8174ba6381e29 /src/backend
parent650d623530c884c087c565f1d3b8cd76f8fe2b95 (diff)
ALTER TABLE ... DETACH PARTITION ... CONCURRENTLY
Allow a partition be detached from its partitioned table without blocking concurrent queries, by running in two transactions and only requiring ShareUpdateExclusive in the partitioned table. Because it runs in two transactions, it cannot be used in a transaction block. This is the main reason to use dedicated syntax: so that users can choose to use the original mode if they need it. But also, it doesn't work when a default partition exists (because an exclusive lock would still need to be obtained on it, in order to change its partition constraint.) In case the second transaction is cancelled or a crash occurs, there's ALTER TABLE .. DETACH PARTITION .. FINALIZE, which executes the final steps. The main trick to make this work is the addition of column pg_inherits.inhdetachpending, initially false; can only be set true in the first part of this command. Once that is committed, concurrent transactions that use a PartitionDirectory will include or ignore partitions so marked: in optimizer they are ignored if the row is marked committed for the snapshot; in executor they are always included. As a result, and because of the way PartitionDirectory caches partition descriptors, queries that were planned before the detach will see the rows in the detached partition and queries that are planned after the detach, won't. A CHECK constraint is created that duplicates the partition constraint. This is probably not strictly necessary, and some users will prefer to remove it afterwards, but if the partition is re-attached to a partitioned table, the constraint needn't be rechecked. Author: Álvaro Herrera <[email protected]> Reviewed-by: Amit Langote <[email protected]> Reviewed-by: Justin Pryzby <[email protected]> Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/catalog/heap.c20
-rw-r--r--src/backend/catalog/index.c4
-rw-r--r--src/backend/catalog/partition.c38
-rw-r--r--src/backend/catalog/pg_inherits.c114
-rw-r--r--src/backend/commands/indexcmds.c6
-rw-r--r--src/backend/commands/tablecmds.c550
-rw-r--r--src/backend/commands/trigger.c5
-rw-r--r--src/backend/executor/execPartition.c29
-rw-r--r--src/backend/nodes/copyfuncs.c1
-rw-r--r--src/backend/nodes/equalfuncs.c1
-rw-r--r--src/backend/optimizer/util/plancat.c8
-rw-r--r--src/backend/parser/gram.y23
-rw-r--r--src/backend/partitioning/partbounds.c6
-rw-r--r--src/backend/partitioning/partdesc.c21
-rw-r--r--src/backend/tcop/utility.c19
-rw-r--r--src/backend/utils/adt/ri_triggers.c6
-rw-r--r--src/backend/utils/cache/partcache.c12
17 files changed, 714 insertions, 149 deletions
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index d0ec44bb40e..9f6303266f9 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1923,7 +1923,12 @@ heap_drop_with_catalog(Oid relid)
elog(ERROR, "cache lookup failed for relation %u", relid);
if (((Form_pg_class) GETSTRUCT(tuple))->relispartition)
{
- parentOid = get_partition_parent(relid);
+ /*
+ * We have to lock the parent if the partition is being detached,
+ * because it's possible that some query still has a partition
+ * descriptor that includes this partition.
+ */
+ parentOid = get_partition_parent(relid, true);
LockRelationOid(parentOid, AccessExclusiveLock);
/*
@@ -2559,10 +2564,12 @@ StoreConstraints(Relation rel, List *cooked_constraints, bool is_internal)
* Returns a list of CookedConstraint nodes that shows the cooked form of
* the default and constraint expressions added to the relation.
*
- * NB: caller should have opened rel with AccessExclusiveLock, and should
- * hold that lock till end of transaction. Also, we assume the caller has
- * done a CommandCounterIncrement if necessary to make the relation's catalog
- * tuples visible.
+ * NB: caller should have opened rel with some self-conflicting lock mode,
+ * and should hold that lock till end of transaction; for normal cases that'll
+ * be AccessExclusiveLock, but if caller knows that the constraint is already
+ * enforced by some other means, it can be ShareUpdateExclusiveLock. Also, we
+ * assume the caller has done a CommandCounterIncrement if necessary to make
+ * the relation's catalog tuples visible.
*/
List *
AddRelationNewConstraints(Relation rel,
@@ -3831,7 +3838,8 @@ StorePartitionBound(Relation rel, Relation parent, PartitionBoundSpec *bound)
* relcache entry for that partition every time a partition is added or
* removed.
*/
- defaultPartOid = get_default_oid_from_partdesc(RelationGetPartitionDesc(parent));
+ defaultPartOid =
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(parent, false));
if (OidIsValid(defaultPartOid))
CacheInvalidateRelcacheByRelid(defaultPartOid);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 397d70d2269..af30840c044 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1837,7 +1837,7 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
List *ancestors = get_partition_ancestors(oldIndexId);
Oid parentIndexRelid = linitial_oid(ancestors);
- DeleteInheritsTuple(oldIndexId, parentIndexRelid);
+ DeleteInheritsTuple(oldIndexId, parentIndexRelid, false, NULL);
StoreSingleInheritance(newIndexId, parentIndexRelid, 1);
list_free(ancestors);
@@ -2487,7 +2487,7 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
/*
* fix INHERITS relation
*/
- DeleteInheritsTuple(indexId, InvalidOid);
+ DeleteInheritsTuple(indexId, InvalidOid, false, NULL);
/*
* We are presently too lazy to attempt to compute the new correct value
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index af7754d6ab7..790f4ccb927 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -32,7 +32,8 @@
#include "utils/rel.h"
#include "utils/syscache.h"
-static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
+static Oid get_partition_parent_worker(Relation inhRel, Oid relid,
+ bool *detach_pending);
static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
List **ancestors);
@@ -42,23 +43,32 @@ static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
*
* Returns inheritance parent of a partition by scanning pg_inherits
*
+ * If the partition is in the process of being detached, an error is thrown,
+ * unless even_if_detached is passed as true.
+ *
* Note: Because this function assumes that the relation whose OID is passed
* as an argument will have precisely one parent, it should only be called
* when it is known that the relation is a partition.
*/
Oid
-get_partition_parent(Oid relid)
+get_partition_parent(Oid relid, bool even_if_detached)
{
Relation catalogRelation;
Oid result;
+ bool detach_pending;
catalogRelation = table_open(InheritsRelationId, AccessShareLock);
- result = get_partition_parent_worker(catalogRelation, relid);
+ result = get_partition_parent_worker(catalogRelation, relid,
+ &detach_pending);
if (!OidIsValid(result))
elog(ERROR, "could not find tuple for parent of relation %u", relid);
+ if (detach_pending && !even_if_detached)
+ elog(ERROR, "relation %u has no parent because it's being detached",
+ relid);
+
table_close(catalogRelation, AccessShareLock);
return result;
@@ -68,15 +78,20 @@ get_partition_parent(Oid relid)
* get_partition_parent_worker
* Scan the pg_inherits relation to return the OID of the parent of the
* given relation
+ *
+ * If the partition is being detached, *detach_pending is set true (but the
+ * original parent is still returned.)
*/
static Oid
-get_partition_parent_worker(Relation inhRel, Oid relid)
+get_partition_parent_worker(Relation inhRel, Oid relid, bool *detach_pending)
{
SysScanDesc scan;
ScanKeyData key[2];
Oid result = InvalidOid;
HeapTuple tuple;
+ *detach_pending = false;
+
ScanKeyInit(&key[0],
Anum_pg_inherits_inhrelid,
BTEqualStrategyNumber, F_OIDEQ,
@@ -93,6 +108,9 @@ get_partition_parent_worker(Relation inhRel, Oid relid)
{
Form_pg_inherits form = (Form_pg_inherits) GETSTRUCT(tuple);
+ /* Let caller know of partition being detached */
+ if (form->inhdetachpending)
+ *detach_pending = true;
result = form->inhparent;
}
@@ -134,10 +152,14 @@ static void
get_partition_ancestors_worker(Relation inhRel, Oid relid, List **ancestors)
{
Oid parentOid;
+ bool detach_pending;
- /* Recursion ends at the topmost level, ie., when there's no parent */
- parentOid = get_partition_parent_worker(inhRel, relid);
- if (parentOid == InvalidOid)
+ /*
+ * Recursion ends at the topmost level, ie., when there's no parent; also
+ * when the partition is being detached.
+ */
+ parentOid = get_partition_parent_worker(inhRel, relid, &detach_pending);
+ if (parentOid == InvalidOid || detach_pending)
return;
*ancestors = lappend_oid(*ancestors, parentOid);
@@ -170,7 +192,7 @@ index_get_partition(Relation partition, Oid indexId)
ReleaseSysCache(tup);
if (!ispartition)
continue;
- if (get_partition_parent(partIdx) == indexId)
+ if (get_partition_parent(partIdx, false) == indexId)
{
list_free(idxlist);
return partIdx;
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c
index 5ab79028274..bedee069be2 100644
--- a/src/backend/catalog/pg_inherits.c
+++ b/src/backend/catalog/pg_inherits.c
@@ -29,6 +29,7 @@
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/memutils.h"
+#include "utils/snapmgr.h"
#include "utils/syscache.h"
/*
@@ -50,9 +51,14 @@ typedef struct SeenRelsEntry
* given rel; caller should already have locked it). If lockmode is NoLock
* then no locks are acquired, but caller must beware of race conditions
* against possible DROPs of child relations.
+ *
+ * include_detached says to include all partitions, even if they're marked
+ * detached. Passing it as false means they might or might not be included,
+ * depending on the visibility of the pg_inherits row for the active snapshot.
*/
List *
-find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
+find_inheritance_children(Oid parentrelId, bool include_detached,
+ LOCKMODE lockmode)
{
List *list = NIL;
Relation relation;
@@ -91,6 +97,30 @@ find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
while ((inheritsTuple = systable_getnext(scan)) != NULL)
{
+ /*
+ * Cope with partitions concurrently being detached. When we see a
+ * partition marked "detach pending", we only include it in the set of
+ * visible partitions if caller requested all detached partitions, or
+ * if its pg_inherits tuple's xmin is still visible to the active
+ * snapshot.
+ *
+ * The reason for this check is that we want to avoid seeing the
+ * partition as alive in RI queries during REPEATABLE READ or
+ * SERIALIZABLE transactions.
+ */
+ if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending &&
+ !include_detached)
+ {
+ TransactionId xmin;
+ Snapshot snap;
+
+ xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data);
+ snap = GetActiveSnapshot();
+
+ if (!XidInMVCCSnapshot(xmin, snap))
+ continue;
+ }
+
inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid;
if (numoids >= maxoids)
{
@@ -160,6 +190,9 @@ find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
* given rel; caller should already have locked it). If lockmode is NoLock
* then no locks are acquired, but caller must beware of race conditions
* against possible DROPs of child relations.
+ *
+ * NB - No current callers of this routine are interested in children being
+ * concurrently detached, so there's no provision to include them.
*/
List *
find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
@@ -199,7 +232,8 @@ find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
ListCell *lc;
/* Get the direct children of this rel */
- currentchildren = find_inheritance_children(currentrel, lockmode);
+ currentchildren = find_inheritance_children(currentrel, false,
+ lockmode);
/*
* Add to the queue only those children not already seen. This avoids
@@ -430,6 +464,7 @@ StoreSingleInheritance(Oid relationId, Oid parentOid, int32 seqNumber)
values[Anum_pg_inherits_inhrelid - 1] = ObjectIdGetDatum(relationId);
values[Anum_pg_inherits_inhparent - 1] = ObjectIdGetDatum(parentOid);
values[Anum_pg_inherits_inhseqno - 1] = Int32GetDatum(seqNumber);
+ values[Anum_pg_inherits_inhdetachpending - 1] = BoolGetDatum(false);
memset(nulls, 0, sizeof(nulls));
@@ -449,10 +484,17 @@ StoreSingleInheritance(Oid relationId, Oid parentOid, int32 seqNumber)
* as InvalidOid, in which case all tuples matching inhrelid are deleted;
* otherwise only delete tuples with the specified inhparent.
*
+ * expect_detach_pending is the expected state of the inhdetachpending flag.
+ * If the catalog row does not match that state, an error is raised.
+ *
+ * childname is the partition name, if a table; pass NULL for regular
+ * inheritance or when working with other relation kinds.
+ *
* Returns whether at least one row was deleted.
*/
bool
-DeleteInheritsTuple(Oid inhrelid, Oid inhparent)
+DeleteInheritsTuple(Oid inhrelid, Oid inhparent, bool expect_detach_pending,
+ const char *childname)
{
bool found = false;
Relation catalogRelation;
@@ -479,6 +521,29 @@ DeleteInheritsTuple(Oid inhrelid, Oid inhparent)
parent = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent;
if (!OidIsValid(inhparent) || parent == inhparent)
{
+ bool detach_pending;
+
+ detach_pending =
+ ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending;
+
+ /*
+ * Raise error depending on state. This should only happen for
+ * partitions, but we have no way to cross-check.
+ */
+ if (detach_pending && !expect_detach_pending)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot detach partition \"%s\"",
+ childname ? childname : "unknown relation"),
+ errdetail("The partition is being detached concurrently or has an unfinished detach."),
+ errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation")));
+ if (!detach_pending && expect_detach_pending)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot complete detaching partition \"%s\"",
+ childname ? childname : "unknown relation"),
+ errdetail("There's no pending concurrent detach.")));
+
CatalogTupleDelete(catalogRelation, &inheritsTuple->t_self);
found = true;
}
@@ -490,3 +555,46 @@ DeleteInheritsTuple(Oid inhrelid, Oid inhparent)
return found;
}
+
+/*
+ * Return whether the pg_inherits tuple for a partition has the "detach
+ * pending" flag set.
+ */
+bool
+PartitionHasPendingDetach(Oid partoid)
+{
+ Relation catalogRelation;
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple inheritsTuple;
+
+ /* We don't have a good way to verify it is in fact a partition */
+
+ /*
+ * Find the pg_inherits entry by inhrelid. (There should only be one.)
+ */
+ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(partoid));
+ scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+ true, NULL, 1, &key);
+
+ while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+ {
+ bool detached;
+
+ detached =
+ ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending;
+
+ /* Done */
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+
+ return detached;
+ }
+
+ elog(ERROR, "relation %u is not a partition", partoid);
+ return false; /* keep compiler quiet */
+}
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index e4e1bbb7e00..166374cc0c9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -410,7 +410,7 @@ CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts)
* GetCurrentVirtualXIDs. If, during any iteration, a particular vxid
* doesn't show up in the output, we know we can forget about it.
*/
-static void
+void
WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
{
int n_old_snapshots;
@@ -1123,7 +1123,7 @@ DefineIndex(Oid relationId,
*/
if (partitioned && stmt->relation && !stmt->relation->inh)
{
- PartitionDesc pd = RelationGetPartitionDesc(rel);
+ PartitionDesc pd = RelationGetPartitionDesc(rel, false);
if (pd->nparts != 0)
flags |= INDEX_CREATE_INVALID;
@@ -1180,7 +1180,7 @@ DefineIndex(Oid relationId,
*
* If we're called internally (no stmt->relation), recurse always.
*/
- partdesc = RelationGetPartitionDesc(rel);
+ partdesc = RelationGetPartitionDesc(rel, false);
if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
{
int nparts = partdesc->nparts;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 550b84681e4..efac06f72c7 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -552,7 +552,8 @@ static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partsp
static void ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
List **partexprs, Oid *partopclass, Oid *partcollation, char strategy);
static void CreateInheritance(Relation child_rel, Relation parent_rel);
-static void RemoveInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel,
+ bool allow_detached);
static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
PartitionCmd *cmd,
AlterTableUtilityContext *context);
@@ -561,8 +562,14 @@ static void QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
List *partConstraint,
bool validate_default);
static void CloneRowTriggersToPartition(Relation parent, Relation partition);
+static void DetachAddConstraintIfNeeded(List **wqueue, Relation partRel);
static void DropClonedTriggersFromPartition(Oid partitionId);
-static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name);
+static ObjectAddress ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab,
+ Relation rel, RangeVar *name,
+ bool concurrent);
+static void DetachPartitionFinalize(Relation rel, Relation partRel,
+ bool concurrent, Oid defaultPartOid);
+static ObjectAddress ATExecDetachPartitionFinalize(Relation rel, RangeVar *name);
static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel,
RangeVar *name);
static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl);
@@ -1010,7 +1017,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
* lock the partition so as to avoid a deadlock.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(parent));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(parent,
+ false));
if (OidIsValid(defaultPartOid))
defaultRel = table_open(defaultPartOid, AccessExclusiveLock);
@@ -1563,7 +1571,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
*/
if (is_partition && relOid != oldRelOid)
{
- state->partParentOid = get_partition_parent(relOid);
+ state->partParentOid = get_partition_parent(relOid, true);
if (OidIsValid(state->partParentOid))
LockRelationOid(state->partParentOid, AccessExclusiveLock);
}
@@ -3323,7 +3331,7 @@ renameatt_internal(Oid myrelid,
* expected_parents will only be 0 if we are not already recursing.
*/
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, NoLock) != NIL)
+ find_inheritance_children(myrelid, false, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited column \"%s\" must be renamed in child tables too",
@@ -3522,7 +3530,7 @@ rename_constraint_internal(Oid myrelid,
else
{
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, NoLock) != NIL)
+ find_inheritance_children(myrelid, false, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited constraint \"%s\" must be renamed in child tables too",
@@ -4142,7 +4150,14 @@ AlterTableGetLockLevel(List *cmds)
break;
case AT_DetachPartition:
- cmd_lockmode = AccessExclusiveLock;
+ if (((PartitionCmd *) cmd->def)->concurrent)
+ cmd_lockmode = ShareUpdateExclusiveLock;
+ else
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ case AT_DetachPartitionFinalize:
+ cmd_lockmode = ShareUpdateExclusiveLock;
break;
case AT_CheckNotNull:
@@ -4227,6 +4242,19 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
tab = ATGetQueueEntry(wqueue, rel);
/*
+ * Disallow any ALTER TABLE other than ALTER TABLE DETACH FINALIZE on
+ * partitions that are pending detach.
+ */
+ if (rel->rd_rel->relispartition &&
+ cmd->subtype != AT_DetachPartitionFinalize &&
+ PartitionHasPendingDetach(RelationGetRelid(rel)))
+ ereport(ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot alter partition \"%s\" with an incomplete detach",
+ RelationGetRelationName(rel)),
+ errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation."));
+
+ /*
* Copy the original subcommand for each table. This avoids conflicts
* when different child tables need to make different parse
* transformations (for example, the same column may have different column
@@ -4539,6 +4567,11 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
/* No command-specific prep needed */
pass = AT_PASS_MISC;
break;
+ case AT_DetachPartitionFinalize:
+ ATSimplePermissions(rel, ATT_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
@@ -4930,7 +4963,12 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
Assert(cmd != NULL);
/* ATPrepCmd ensures it must be a table */
Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
- ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name);
+ ATExecDetachPartition(wqueue, tab, rel,
+ ((PartitionCmd *) cmd->def)->name,
+ ((PartitionCmd *) cmd->def)->concurrent);
+ break;
+ case AT_DetachPartitionFinalize:
+ ATExecDetachPartitionFinalize(rel, ((PartitionCmd *) cmd->def)->name);
break;
case AT_AlterCollationRefreshVersion:
/* ATPrepCmd ensured it must be an index */
@@ -6362,7 +6400,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
*/
if (colDef->identity &&
recurse &&
- find_inheritance_children(myrelid, NoLock) != NIL)
+ find_inheritance_children(myrelid, false, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("cannot recursively add identity column to table that has child tables")));
@@ -6607,7 +6645,8 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
* routines, we have to do this one level of recursion at a time; we can't
* use find_all_inheritors to do it in one pass.
*/
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
/*
* If we are told not to recurse, there had better not be any child
@@ -6761,7 +6800,7 @@ ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
*/
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(rel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, false);
Assert(partdesc != NULL);
if (partdesc->nparts > 0 && !recurse && !recursing)
@@ -6860,7 +6899,7 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
/* If rel is partition, shouldn't drop NOT NULL if parent has the same */
if (rel->rd_rel->relispartition)
{
- Oid parentId = get_partition_parent(RelationGetRelid(rel));
+ Oid parentId = get_partition_parent(RelationGetRelid(rel), false);
Relation parent = table_open(parentId, AccessShareLock);
TupleDesc tupDesc = RelationGetDescr(parent);
AttrNumber parent_attnum;
@@ -7470,7 +7509,7 @@ ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recurs
* resulting state can be properly dumped and restored.
*/
if (!recurse &&
- find_inheritance_children(RelationGetRelid(rel), lockmode))
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
@@ -8077,7 +8116,8 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
* routines, we have to do this one level of recursion at a time; we can't
* use find_all_inheritors to do it in one pass.
*/
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
if (children)
{
@@ -8541,7 +8581,8 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
* routines, we have to do this one level of recursion at a time; we can't
* use find_all_inheritors to do it in one pass.
*/
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
/*
* Check if ONLY was specified with ALTER TABLE. If so, allow the
@@ -9156,7 +9197,7 @@ addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint, Relation rel,
*/
if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc pd = RelationGetPartitionDesc(pkrel);
+ PartitionDesc pd = RelationGetPartitionDesc(pkrel, false);
for (int i = 0; i < pd->nparts; i++)
{
@@ -9290,7 +9331,7 @@ addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel,
}
else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc pd = RelationGetPartitionDesc(rel);
+ PartitionDesc pd = RelationGetPartitionDesc(rel, false);
/*
* Recurse to take appropriate action on each partition; either we
@@ -11074,7 +11115,8 @@ ATExecDropConstraint(Relation rel, const char *constrName,
* use find_all_inheritors to do it in one pass.
*/
if (!is_no_inherit_constraint)
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
else
children = NIL;
@@ -11458,7 +11500,8 @@ ATPrepAlterColumnType(List **wqueue,
}
}
else if (!recursing &&
- find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL)
+ find_inheritance_children(RelationGetRelid(rel), false,
+ NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("type of inherited column \"%s\" must be changed in child tables too",
@@ -14296,7 +14339,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
*/
/* Off to RemoveInheritance() where most of the work happens */
- RemoveInheritance(rel, parent_rel);
+ RemoveInheritance(rel, parent_rel, false);
ObjectAddressSet(address, RelationRelationId,
RelationGetRelid(parent_rel));
@@ -14308,11 +14351,71 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
}
/*
+ * MarkInheritDetached
+ *
+ * Set inhdetachpending for a partition, for ATExecDetachPartition
+ * in concurrent mode.
+ */
+static void
+MarkInheritDetached(Relation child_rel, Relation parent_rel)
+{
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key;
+ HeapTuple inheritsTuple;
+ bool found = false;
+
+ Assert(child_rel->rd_rel->relkind == RELKIND_RELATION ||
+ child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+ Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+ /*
+ * Find pg_inherits entries by inhrelid.
+ */
+ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
+ scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+ true, NULL, 1, &key);
+
+ while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+ {
+ HeapTuple newtup;
+
+ if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent !=
+ RelationGetRelid(parent_rel))
+ elog(ERROR, "bad parent tuple found for partition %u",
+ RelationGetRelid(child_rel));
+
+ newtup = heap_copytuple(inheritsTuple);
+ ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true;
+
+ CatalogTupleUpdate(catalogRelation,
+ &inheritsTuple->t_self,
+ newtup);
+ found = true;
+ }
+
+ /* Done */
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+ RelationGetRelationName(child_rel),
+ RelationGetRelationName(parent_rel))));
+}
+
+/*
* RemoveInheritance
*
* Drop a parent from the child's parents. This just adjusts the attinhcount
* and attislocal of the columns and removes the pg_inherit and pg_depend
- * entries.
+ * entries. expect_detached is passed down to DeleteInheritsTuple, q.v..
*
* If attinhcount goes to 0 then attislocal gets set to true. If it goes back
* up attislocal stays true, which means if a child is ever removed from a
@@ -14326,7 +14429,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
* Common to ATExecDropInherit() and ATExecDetachPartition().
*/
static void
-RemoveInheritance(Relation child_rel, Relation parent_rel)
+RemoveInheritance(Relation child_rel, Relation parent_rel, bool expect_detached)
{
Relation catalogRelation;
SysScanDesc scan;
@@ -14342,7 +14445,9 @@ RemoveInheritance(Relation child_rel, Relation parent_rel)
child_is_partition = true;
found = DeleteInheritsTuple(RelationGetRelid(child_rel),
- RelationGetRelid(parent_rel));
+ RelationGetRelid(parent_rel),
+ expect_detached,
+ RelationGetRelationName(child_rel));
if (!found)
{
if (child_is_partition)
@@ -16508,7 +16613,7 @@ QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
}
else if (scanrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(scanrel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, false);
int i;
for (i = 0; i < partdesc->nparts; i++)
@@ -16568,7 +16673,7 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
* new partition will change its partition constraint.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(rel));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
if (OidIsValid(defaultPartOid))
LockRelationOid(defaultPartOid, AccessExclusiveLock);
@@ -17157,105 +17262,213 @@ CloneRowTriggersToPartition(Relation parent, Relation partition)
* ALTER TABLE DETACH PARTITION
*
* Return the address of the relation that is no longer a partition of rel.
+ *
+ * If concurrent mode is requested, we run in two transactions. A side-
+ * effect is that this command cannot run in a multi-part ALTER TABLE.
+ * Currently, that's enforced by the grammar.
+ *
+ * The strategy for concurrency is to first modify the partition's
+ * pg_inherit catalog row to make it visible to everyone that the
+ * partition is detached, lock the partition against writes, and commit
+ * the transaction; anyone who requests the partition descriptor from
+ * that point onwards has to ignore such a partition. In a second
+ * transaction, we wait until all transactions that could have seen the
+ * partition as attached are gone, then we remove the rest of partition
+ * metadata (pg_inherits and pg_class.relpartbounds).
*/
static ObjectAddress
-ATExecDetachPartition(Relation rel, RangeVar *name)
+ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ RangeVar *name, bool concurrent)
{
- Relation partRel,
- classRel;
- HeapTuple tuple,
- newtuple;
- Datum new_val[Natts_pg_class];
- bool new_null[Natts_pg_class],
- new_repl[Natts_pg_class];
+ Relation partRel;
ObjectAddress address;
Oid defaultPartOid;
- List *indexes;
- List *fks;
- ListCell *cell;
/*
* We must lock the default partition, because detaching this partition
* will change its partition constraint.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(rel));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
if (OidIsValid(defaultPartOid))
+ {
+ /*
+ * Concurrent detaching when a default partition exists is not
+ * supported. The main problem is that the default partition
+ * constraint would change. And there's a definitional problem: what
+ * should happen to the tuples that are being inserted that belong to
+ * the partition being detached? Putting them on the partition being
+ * detached would be wrong, since they'd become "lost" after the but
+ * we cannot put them in the default partition either until we alter
+ * its partition constraint.
+ *
+ * I think we could solve this problem if we effected the constraint
+ * change before committing the first transaction. But the lock would
+ * have to remain AEL and it would cause concurrent query planning to
+ * be blocked, so changing it that way would be even worse.
+ */
+ if (concurrent)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot detach partitions concurrently when a default partition exists")));
LockRelationOid(defaultPartOid, AccessExclusiveLock);
+ }
- partRel = table_openrv(name, ShareUpdateExclusiveLock);
+ /*
+ * In concurrent mode, the partition is locked with share-update-exclusive
+ * in the first transaction. This allows concurrent transactions to be
+ * doing DML to the partition.
+ */
+ partRel = table_openrv(name, concurrent ? ShareUpdateExclusiveLock :
+ AccessExclusiveLock);
- /* Ensure that foreign keys still hold after this detach */
+ /*
+ * Check inheritance conditions and either delete the pg_inherits row
+ * (in non-concurrent mode) or just set the inhdetachpending flag.
+ */
+ if (!concurrent)
+ RemoveInheritance(partRel, rel, false);
+ else
+ MarkInheritDetached(partRel, rel);
+
+ /*
+ * Ensure that foreign keys still hold after this detach. This keeps
+ * locks on the referencing tables, which prevents concurrent transactions
+ * from adding rows that we wouldn't see. For this to work in concurrent
+ * mode, it is critical that the partition appears as no longer attached
+ * for the RI queries as soon as the first transaction commits.
+ */
ATDetachCheckNoForeignKeyRefs(partRel);
- /* All inheritance related checks are performed within the function */
- RemoveInheritance(partRel, rel);
+ /*
+ * Concurrent mode has to work harder; first we add a new constraint to
+ * the partition that matches the partition constraint. Then we close our
+ * existing transaction, and in a new one wait for all processes to catch
+ * up on the catalog updates we've done so far; at that point we can
+ * complete the operation.
+ */
+ if (concurrent)
+ {
+ Oid partrelid,
+ parentrelid;
+ LOCKTAG tag;
+ char *parentrelname;
+ char *partrelname;
- /* Update pg_class tuple */
- classRel = table_open(RelationRelationId, RowExclusiveLock);
- tuple = SearchSysCacheCopy1(RELOID,
- ObjectIdGetDatum(RelationGetRelid(partRel)));
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "cache lookup failed for relation %u",
- RelationGetRelid(partRel));
- Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+ /*
+ * Add a new constraint to the partition being detached, which
+ * supplants the partition constraint (unless there is one already).
+ */
+ DetachAddConstraintIfNeeded(wqueue, partRel);
- /* Clear relpartbound and reset relispartition */
- memset(new_val, 0, sizeof(new_val));
- memset(new_null, false, sizeof(new_null));
- memset(new_repl, false, sizeof(new_repl));
- new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
- new_null[Anum_pg_class_relpartbound - 1] = true;
- new_repl[Anum_pg_class_relpartbound - 1] = true;
- newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
- new_val, new_null, new_repl);
+ /*
+ * We're almost done now; the only traces that remain are the
+ * pg_inherits tuple and the partition's relpartbounds. Before we can
+ * remove those, we need to wait until all transactions that know that
+ * this is a partition are gone.
+ */
- ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
- CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple);
- heap_freetuple(newtuple);
+ /*
+ * Remember relation OIDs to re-acquire them later; and relation names
+ * too, for error messages if something is dropped in between.
+ */
+ partrelid = RelationGetRelid(partRel);
+ parentrelid = RelationGetRelid(rel);
+ parentrelname = MemoryContextStrdup(PortalContext,
+ RelationGetRelationName(rel));
+ partrelname = MemoryContextStrdup(PortalContext,
+ RelationGetRelationName(partRel));
+
+ /* Invalidate relcache entries for the parent -- must be before close */
+ CacheInvalidateRelcache(rel);
+
+ table_close(partRel, NoLock);
+ table_close(rel, NoLock);
+ tab->rel = NULL;
+
+ /* Make updated catalog entry visible */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ StartTransactionCommand();
- if (OidIsValid(defaultPartOid))
- {
/*
- * If the relation being detached is the default partition itself,
- * remove it from the parent's pg_partitioned_table entry.
+ * Now wait. This ensures that all queries that were planned including
+ * the partition are finished before we remove the rest of catalog
+ * entries. We don't need or indeed want to acquire this lock, though
+ * -- that would block later queries.
*
- * If not, we must invalidate default partition's relcache entry, as
- * in StorePartitionBound: its partition constraint depends on every
- * other partition's partition constraint.
+ * We don't need to concern ourselves with waiting for a lock on the
+ * partition itself, since we will acquire AccessExclusiveLock below.
*/
- if (RelationGetRelid(partRel) == defaultPartOid)
- update_default_partition_oid(RelationGetRelid(rel), InvalidOid);
- else
- CacheInvalidateRelcacheByRelid(defaultPartOid);
+ SET_LOCKTAG_RELATION(tag, MyDatabaseId, parentrelid);
+ WaitForLockersMultiple(list_make1(&tag), AccessExclusiveLock, false);
+
+ /*
+ * Now acquire locks in both relations again. Note they may have been
+ * removed in the meantime, so care is required.
+ */
+ rel = try_relation_open(parentrelid, ShareUpdateExclusiveLock);
+ partRel = try_relation_open(partrelid, AccessExclusiveLock);
+
+ /* If the relations aren't there, something bad happened; bail out */
+ if (rel == NULL)
+ {
+ if (partRel != NULL) /* shouldn't happen */
+ elog(WARNING, "dangling partition \"%s\" remains, can't fix",
+ partrelname);
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("partitioned table \"%s\" was removed concurrently",
+ parentrelname)));
+ }
+ if (partRel == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("partition \"%s\" was removed concurrently", partrelname)));
+
+ tab->rel = rel;
}
- /* detach indexes too */
- indexes = RelationGetIndexList(partRel);
- foreach(cell, indexes)
- {
- Oid idxid = lfirst_oid(cell);
- Relation idx;
- Oid constrOid;
+ /* Do the final part of detaching */
+ DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
- if (!has_superclass(idxid))
- continue;
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
- Assert((IndexGetRelation(get_partition_parent(idxid), false) ==
- RelationGetRelid(rel)));
+ /* keep our lock until commit */
+ table_close(partRel, NoLock);
- idx = index_open(idxid, AccessExclusiveLock);
- IndexSetParentIndex(idx, InvalidOid);
+ return address;
+}
- /* If there's a constraint associated with the index, detach it too */
- constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel),
- idxid);
- if (OidIsValid(constrOid))
- ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+/*
+ * Second part of ALTER TABLE .. DETACH.
+ *
+ * This is separate so that it can be run independently when the second
+ * transaction of the concurrent algorithm fails (crash or abort).
+ */
+static void
+DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent,
+ Oid defaultPartOid)
+{
+ Relation classRel;
+ List *fks;
+ ListCell *cell;
+ List *indexes;
+ Datum new_val[Natts_pg_class];
+ bool new_null[Natts_pg_class],
+ new_repl[Natts_pg_class];
+ HeapTuple tuple,
+ newtuple;
- index_close(idx, NoLock);
+ if (concurrent)
+ {
+ /*
+ * We can remove the pg_inherits row now. (In the non-concurrent case,
+ * this was already done).
+ */
+ RemoveInheritance(partRel, rel, true);
}
- table_close(classRel, RowExclusiveLock);
/* Drop any triggers that were cloned on creation/attach. */
DropClonedTriggersFromPartition(RelationGetRelid(partRel));
@@ -17328,23 +17541,162 @@ ATExecDetachPartition(Relation rel, RangeVar *name)
ObjectAddressSet(constraint, ConstraintRelationId, constrOid);
performDeletion(&constraint, DROP_RESTRICT, 0);
}
- CommandCounterIncrement();
+
+ /* Now we can detach indexes */
+ indexes = RelationGetIndexList(partRel);
+ foreach(cell, indexes)
+ {
+ Oid idxid = lfirst_oid(cell);
+ Relation idx;
+ Oid constrOid;
+
+ if (!has_superclass(idxid))
+ continue;
+
+ Assert((IndexGetRelation(get_partition_parent(idxid, false), false) ==
+ RelationGetRelid(rel)));
+
+ idx = index_open(idxid, AccessExclusiveLock);
+ IndexSetParentIndex(idx, InvalidOid);
+
+ /* If there's a constraint associated with the index, detach it too */
+ constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel),
+ idxid);
+ if (OidIsValid(constrOid))
+ ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+
+ index_close(idx, NoLock);
+ }
+
+ /* Update pg_class tuple */
+ classRel = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(partRel)));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u",
+ RelationGetRelid(partRel));
+ Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+
+ /* Clear relpartbound and reset relispartition */
+ memset(new_val, 0, sizeof(new_val));
+ memset(new_null, false, sizeof(new_null));
+ memset(new_repl, false, sizeof(new_repl));
+ new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
+ new_null[Anum_pg_class_relpartbound - 1] = true;
+ new_repl[Anum_pg_class_relpartbound - 1] = true;
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+ new_val, new_null, new_repl);
+
+ ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
+ CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple);
+ heap_freetuple(newtuple);
+ table_close(classRel, RowExclusiveLock);
+
+ if (OidIsValid(defaultPartOid))
+ {
+ /*
+ * If the relation being detached is the default partition itself,
+ * remove it from the parent's pg_partitioned_table entry.
+ *
+ * If not, we must invalidate default partition's relcache entry, as
+ * in StorePartitionBound: its partition constraint depends on every
+ * other partition's partition constraint.
+ */
+ if (RelationGetRelid(partRel) == defaultPartOid)
+ update_default_partition_oid(RelationGetRelid(rel), InvalidOid);
+ else
+ CacheInvalidateRelcacheByRelid(defaultPartOid);
+ }
/*
* Invalidate the parent's relcache so that the partition is no longer
* included in its partition descriptor.
*/
CacheInvalidateRelcache(rel);
+}
+
+/*
+ * ALTER TABLE ... DETACH PARTITION ... FINALIZE
+ *
+ * To use when a DETACH PARTITION command previously did not run to
+ * completion; this completes the detaching process.
+ */
+static ObjectAddress
+ATExecDetachPartitionFinalize(Relation rel, RangeVar *name)
+{
+ Relation partRel;
+ ObjectAddress address;
+ Snapshot snap = GetActiveSnapshot();
+
+ partRel = table_openrv(name, AccessExclusiveLock);
+
+ /*
+ * Wait until existing snapshots are gone. This is important if the
+ * second transaction of DETACH PARTITION CONCURRENTLY is canceled: the
+ * user could immediately run DETACH FINALIZE without actually waiting for
+ * existing transactions. We must not complete the detach action until
+ * all such queries are complete (otherwise we would present them with an
+ * inconsistent view of catalogs).
+ */
+ WaitForOlderSnapshots(snap->xmin, false);
+
+ DetachPartitionFinalize(rel, partRel, true, InvalidOid);
ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
- /* keep our lock until commit */
table_close(partRel, NoLock);
return address;
}
/*
+ * DetachAddConstraintIfNeeded
+ * Subroutine for ATExecDetachPartition. Create a constraint that
+ * takes the place of the partition constraint, but avoid creating
+ * a dupe if an equivalent constraint already exists.
+ */
+static void
+DetachAddConstraintIfNeeded(List **wqueue, Relation partRel)
+{
+ AlteredTableInfo *tab;
+ Expr *constraintExpr;
+ TupleDesc td = RelationGetDescr(partRel);
+ Constraint *n;
+
+ constraintExpr = make_ands_explicit(RelationGetPartitionQual(partRel));
+
+ /* If an identical constraint exists, we don't need to create one */
+ if (td->constr && td->constr->num_check > 0)
+ {
+ for (int i = 0; i < td->constr->num_check; i++)
+ {
+ Node *thisconstr;
+
+ thisconstr = stringToNode(td->constr->check[i].ccbin);
+
+ if (equal(constraintExpr, thisconstr))
+ return;
+ }
+ }
+
+ tab = ATGetQueueEntry(wqueue, partRel);
+
+ /* Add constraint on partition, equivalent to the partition constraint */
+ n = makeNode(Constraint);
+ n->contype = CONSTR_CHECK;
+ n->conname = NULL;
+ n->location = -1;
+ n->is_no_inherit = false;
+ n->raw_expr = NULL;
+ n->cooked_expr = nodeToString(constraintExpr);
+ n->initially_valid = true;
+ n->skip_validation = true;
+ /* It's a re-add, since it nominally already exists */
+ ATAddCheckConstraint(wqueue, tab, partRel, n,
+ true, false, true, ShareUpdateExclusiveLock);
+}
+
+/*
* DropClonedTriggersFromPartition
* subroutine for ATExecDetachPartition to remove any triggers that were
* cloned to the partition when it was created-as-partition or attached.
@@ -17511,7 +17863,7 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name)
/* Silently do nothing if already in the right state */
currParent = partIdx->rd_rel->relispartition ?
- get_partition_parent(partIdxId) : InvalidOid;
+ get_partition_parent(partIdxId, false) : InvalidOid;
if (currParent != RelationGetRelid(parentIdx))
{
IndexInfo *childInfo;
@@ -17539,7 +17891,7 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name)
RelationGetRelationName(partIdx))));
/* Make sure it indexes a partition of the other index's table */
- partDesc = RelationGetPartitionDesc(parentTbl);
+ partDesc = RelationGetPartitionDesc(parentTbl, false);
found = false;
for (i = 0; i < partDesc->nparts; i++)
{
@@ -17693,7 +18045,7 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
* If we found as many inherited indexes as the partitioned table has
* partitions, we're good; update pg_index to set indisvalid.
*/
- if (tuples == RelationGetPartitionDesc(partedTbl)->nparts)
+ if (tuples == RelationGetPartitionDesc(partedTbl, false)->nparts)
{
Relation idxRel;
HeapTuple newtup;
@@ -17723,8 +18075,8 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
/* make sure we see the validation we just did */
CommandCounterIncrement();
- parentIdxId = get_partition_parent(RelationGetRelid(partedIdx));
- parentTblId = get_partition_parent(RelationGetRelid(partedTbl));
+ parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), false);
+ parentTblId = get_partition_parent(RelationGetRelid(partedTbl), false);
parentIdx = relation_open(parentIdxId, AccessExclusiveLock);
parentTbl = relation_open(parentTblId, AccessExclusiveLock);
Assert(!parentIdx->rd_index->indisvalid);
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 4e4e05844c5..7383d5994eb 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -1119,7 +1119,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
*/
if (partition_recurse)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(rel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, false);
List *idxs = NIL;
List *childTbls = NIL;
ListCell *l;
@@ -1141,7 +1141,8 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
ListCell *l;
List *idxs = NIL;
- idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock);
+ idxs = find_inheritance_children(indexOid, false,
+ ShareRowExclusiveLock);
foreach(l, idxs)
childTbls = lappend_oid(childTbls,
IndexGetRelation(lfirst_oid(l),
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index b8da4c5967d..619aaffae43 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -569,6 +569,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
int partidx)
{
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+ Oid partOid = dispatch->partdesc->oids[partidx];
Relation partrel;
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
@@ -579,7 +580,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
oldcxt = MemoryContextSwitchTo(proute->memcxt);
- partrel = table_open(dispatch->partdesc->oids[partidx], RowExclusiveLock);
+ partrel = table_open(partOid, RowExclusiveLock);
leaf_part_rri = makeNode(ResultRelInfo);
InitResultRelInfo(leaf_part_rri,
@@ -1065,9 +1066,21 @@ ExecInitPartitionDispatchInfo(EState *estate,
int dispatchidx;
MemoryContext oldcxt;
+ /*
+ * For data modification, it is better that executor does not include
+ * partitions being detached, except in snapshot-isolation mode. This
+ * means that a read-committed transaction immediately gets a "no
+ * partition for tuple" error when a tuple is inserted into a partition
+ * that's being detached concurrently, but a transaction in repeatable-
+ * read mode can still use the partition. Note that because partition
+ * detach uses ShareLock on the partition (which conflicts with DML),
+ * we're certain that the detach won't be able to complete until any
+ * inserting transaction is done.
+ */
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
- CreatePartitionDirectory(estate->es_query_cxt);
+ CreatePartitionDirectory(estate->es_query_cxt,
+ IsolationUsesXactSnapshot());
oldcxt = MemoryContextSwitchTo(proute->memcxt);
@@ -1645,9 +1658,10 @@ ExecCreatePartitionPruneState(PlanState *planstate,
ListCell *lc;
int i;
+ /* Executor must always include detached partitions */
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
- CreatePartitionDirectory(estate->es_query_cxt);
+ CreatePartitionDirectory(estate->es_query_cxt, true);
n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
Assert(n_part_hierarchies > 0);
@@ -1713,9 +1727,12 @@ ExecCreatePartitionPruneState(PlanState *planstate,
partrel);
/*
- * Initialize the subplan_map and subpart_map. Since detaching a
- * partition requires AccessExclusiveLock, no partitions can have
- * disappeared, nor can the bounds for any partition have changed.
+ * Initialize the subplan_map and subpart_map.
+ *
+ * Because we request detached partitions to be included, and
+ * detaching waits for old transactions, it is safe to assume that
+ * no partitions have disappeared since this query was planned.
+ *
* However, new partitions may have been added.
*/
Assert(partdesc->nparts >= pinfo->nparts);
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 82d7cce5d57..38b56231b7d 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -4737,6 +4737,7 @@ _copyPartitionCmd(const PartitionCmd *from)
COPY_NODE_FIELD(name);
COPY_NODE_FIELD(bound);
+ COPY_SCALAR_FIELD(concurrent);
return newnode;
}
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 3e980c457c5..3292dda3424 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2975,6 +2975,7 @@ _equalPartitionCmd(const PartitionCmd *a, const PartitionCmd *b)
{
COMPARE_NODE_FIELD(name);
COMPARE_NODE_FIELD(bound);
+ COMPARE_SCALAR_FIELD(concurrent);
return true;
}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 7f2e40ae39e..6c39bf893f8 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2141,10 +2141,14 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
{
PartitionDesc partdesc;
- /* Create the PartitionDirectory infrastructure if we didn't already */
+ /*
+ * Create the PartitionDirectory infrastructure if we didn't already.
+ */
if (root->glob->partition_directory == NULL)
+ {
root->glob->partition_directory =
- CreatePartitionDirectory(CurrentMemoryContext);
+ CreatePartitionDirectory(CurrentMemoryContext, false);
+ }
partdesc = PartitionDirectoryLookup(root->glob->partition_directory,
relation);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 4843ff99e2d..2132cf4d828 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -658,7 +658,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXPRESSION
EXTENSION EXTERNAL EXTRACT
- FALSE_P FAMILY FETCH FILTER FIRST_P FLOAT_P FOLLOWING FOR
+ FALSE_P FAMILY FETCH FILTER FINALIZE FIRST_P FLOAT_P FOLLOWING FOR
FORCE FOREIGN FORWARD FREEZE FROM FULL FUNCTION FUNCTIONS
GENERATED GLOBAL GRANT GRANTED GREATEST GROUP_P GROUPING GROUPS
@@ -2108,12 +2108,13 @@ partition_cmd:
n->subtype = AT_AttachPartition;
cmd->name = $3;
cmd->bound = $4;
+ cmd->concurrent = false;
n->def = (Node *) cmd;
$$ = (Node *) n;
}
- /* ALTER TABLE <name> DETACH PARTITION <partition_name> */
- | DETACH PARTITION qualified_name
+ /* ALTER TABLE <name> DETACH PARTITION <partition_name> [CONCURRENTLY] */
+ | DETACH PARTITION qualified_name opt_concurrently
{
AlterTableCmd *n = makeNode(AlterTableCmd);
PartitionCmd *cmd = makeNode(PartitionCmd);
@@ -2121,10 +2122,23 @@ partition_cmd:
n->subtype = AT_DetachPartition;
cmd->name = $3;
cmd->bound = NULL;
+ cmd->concurrent = $4;
n->def = (Node *) cmd;
$$ = (Node *) n;
}
+ | DETACH PARTITION qualified_name FINALIZE
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ PartitionCmd *cmd = makeNode(PartitionCmd);
+
+ n->subtype = AT_DetachPartitionFinalize;
+ cmd->name = $3;
+ cmd->bound = NULL;
+ cmd->concurrent = false;
+ n->def = (Node *) cmd;
+ $$ = (Node *) n;
+ }
;
index_partition_cmd:
@@ -2137,6 +2151,7 @@ index_partition_cmd:
n->subtype = AT_AttachPartition;
cmd->name = $3;
cmd->bound = NULL;
+ cmd->concurrent = false;
n->def = (Node *) cmd;
$$ = (Node *) n;
@@ -15395,6 +15410,7 @@ unreserved_keyword:
| EXTERNAL
| FAMILY
| FILTER
+ | FINALIZE
| FIRST_P
| FOLLOWING
| FORCE
@@ -15936,6 +15952,7 @@ bare_label_keyword:
| EXTRACT
| FALSE_P
| FAMILY
+ | FINALIZE
| FIRST_P
| FLOAT_P
| FOLLOWING
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index e5f3482d52e..2b2b1dc1ad7 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2798,7 +2798,7 @@ check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec, ParseState *pstate)
{
PartitionKey key = RelationGetPartitionKey(parent);
- PartitionDesc partdesc = RelationGetPartitionDesc(parent);
+ PartitionDesc partdesc = RelationGetPartitionDesc(parent, true);
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int with = -1;
bool overlap = false;
@@ -3990,7 +3990,7 @@ get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
{
int i;
int ndatums = 0;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent);
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
PartitionBoundInfo boundinfo = pdesc->boundinfo;
if (boundinfo)
@@ -4190,7 +4190,7 @@ get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
if (spec->is_default)
{
List *or_expr_args = NIL;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent);
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
Oid *inhoids = pdesc->oids;
int nparts = pdesc->nparts,
i;
diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c
index f852b6e99de..58570fecfdc 100644
--- a/src/backend/partitioning/partdesc.c
+++ b/src/backend/partitioning/partdesc.c
@@ -37,6 +37,7 @@ typedef struct PartitionDirectoryData
{
MemoryContext pdir_mcxt;
HTAB *pdir_hash;
+ bool include_detached;
} PartitionDirectoryData;
typedef struct PartitionDirectoryEntry
@@ -46,7 +47,7 @@ typedef struct PartitionDirectoryEntry
PartitionDesc pd;
} PartitionDirectoryEntry;
-static void RelationBuildPartitionDesc(Relation rel);
+static void RelationBuildPartitionDesc(Relation rel, bool include_detached);
/*
@@ -61,13 +62,14 @@ static void RelationBuildPartitionDesc(Relation rel);
* that the data doesn't become stale.
*/
PartitionDesc
-RelationGetPartitionDesc(Relation rel)
+RelationGetPartitionDesc(Relation rel, bool include_detached)
{
if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
return NULL;
- if (unlikely(rel->rd_partdesc == NULL))
- RelationBuildPartitionDesc(rel);
+ if (unlikely(rel->rd_partdesc == NULL ||
+ rel->rd_partdesc->includes_detached != include_detached))
+ RelationBuildPartitionDesc(rel, include_detached);
return rel->rd_partdesc;
}
@@ -88,7 +90,7 @@ RelationGetPartitionDesc(Relation rel)
* permanently.
*/
static void
-RelationBuildPartitionDesc(Relation rel)
+RelationBuildPartitionDesc(Relation rel, bool include_detached)
{
PartitionDesc partdesc;
PartitionBoundInfo boundinfo = NULL;
@@ -110,7 +112,8 @@ RelationBuildPartitionDesc(Relation rel)
* concurrently, whatever this function returns will be accurate as of
* some well-defined point in time.
*/
- inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
+ inhoids = find_inheritance_children(RelationGetRelid(rel), include_detached,
+ NoLock);
nparts = list_length(inhoids);
/* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
@@ -238,6 +241,7 @@ RelationBuildPartitionDesc(Relation rel)
partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
+ partdesc->includes_detached = include_detached;
/*
* Assign OIDs from the original array into mapped indexes of the
@@ -280,7 +284,7 @@ RelationBuildPartitionDesc(Relation rel)
* Create a new partition directory object.
*/
PartitionDirectory
-CreatePartitionDirectory(MemoryContext mcxt)
+CreatePartitionDirectory(MemoryContext mcxt, bool include_detached)
{
MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
PartitionDirectory pdir;
@@ -295,6 +299,7 @@ CreatePartitionDirectory(MemoryContext mcxt)
pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+ pdir->include_detached = include_detached;
MemoryContextSwitchTo(oldcontext);
return pdir;
@@ -327,7 +332,7 @@ PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
*/
RelationIncrementReferenceCount(rel);
pde->rel = rel;
- pde->pd = RelationGetPartitionDesc(rel);
+ pde->pd = RelationGetPartitionDesc(rel, pdir->include_detached);
Assert(pde->pd != NULL);
}
return pde->pd;
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 05bb698cf45..729274b330f 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1236,6 +1236,25 @@ ProcessUtilitySlow(ParseState *pstate,
AlterTableStmt *atstmt = (AlterTableStmt *) parsetree;
Oid relid;
LOCKMODE lockmode;
+ ListCell *cell;
+
+ /*
+ * Disallow ALTER TABLE .. DETACH CONCURRENTLY in a
+ * transaction block or function. (Perhaps it could be
+ * allowed in a procedure, but don't hold your breath.)
+ */
+ foreach(cell, atstmt->cmds)
+ {
+ AlterTableCmd *cmd = (AlterTableCmd *) lfirst(cell);
+
+ /* Disallow DETACH CONCURRENTLY in a transaction block */
+ if (cmd->subtype == AT_DetachPartition)
+ {
+ if (((PartitionCmd *) cmd->def)->concurrent)
+ PreventInTransactionBlock(isTopLevel,
+ "ALTER TABLE ... DETACH CONCURRENTLY");
+ }
+ }
/*
* Figure out lock mode, and acquire lock. This also does
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 09a2ad28814..7c77c338cec 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -392,11 +392,15 @@ RI_FKey_check(TriggerData *trigdata)
/*
* Now check that foreign key exists in PK table
+ *
+ * XXX detectNewRows must be true when a partitioned table is on the
+ * referenced side. The reason is that our snapshot must be fresh
+ * in order for the hack in find_inheritance_children() to work.
*/
ri_PerformCheck(riinfo, &qkey, qplan,
fk_rel, pk_rel,
NULL, newslot,
- false,
+ pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE,
SPI_OK_SELECT);
if (SPI_finish() != SPI_OK_FINISH)
diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c
index a6388d980ed..21e60f0c5e8 100644
--- a/src/backend/utils/cache/partcache.c
+++ b/src/backend/utils/cache/partcache.c
@@ -341,6 +341,7 @@ generate_partition_qual(Relation rel)
bool isnull;
List *my_qual = NIL,
*result = NIL;
+ Oid parentrelid;
Relation parent;
/* Guard against stack overflow due to overly deep partition tree */
@@ -350,9 +351,14 @@ generate_partition_qual(Relation rel)
if (rel->rd_partcheckvalid)
return copyObject(rel->rd_partcheck);
- /* Grab at least an AccessShareLock on the parent table */
- parent = relation_open(get_partition_parent(RelationGetRelid(rel)),
- AccessShareLock);
+ /*
+ * Grab at least an AccessShareLock on the parent table. Must do this
+ * even if the partition has been partially detached, because transactions
+ * concurrent with the detach might still be trying to use a partition
+ * descriptor that includes it.
+ */
+ parentrelid = get_partition_parent(RelationGetRelid(rel), true);
+ parent = relation_open(parentrelid, AccessShareLock);
/* Get pg_class.relpartbound */
tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));