summaryrefslogtreecommitdiff
path: root/src/backend/commands
diff options
context:
space:
mode:
authorNoah Misch2020-08-15 17:15:53 +0000
committerNoah Misch2020-08-15 17:15:53 +0000
commit566372b3d6435639e4cc4476d79b8505a0297c87 (patch)
treecb23c4c35d85e463569d0759b275f484c240cc47 /src/backend/commands
parentd4d443b3bbbb3eb9cdc511564ef3c57fde7dd3ac (diff)
Prevent concurrent SimpleLruTruncate() for any given SLRU.
The SimpleLruTruncate() header comment states the new coding rule. To achieve this, add locktype "frozenid" and two LWLocks. This closes a rare opportunity for data loss, which manifested as "apparent wraparound" or "could not access status of transaction" errors. Data loss is more likely in pg_multixact, due to released branches' thin margin between multiStopLimit and multiWrapLimit. If a user's physical replication primary logged ": apparent wraparound" messages, the user should rebuild standbys of that primary regardless of symptoms. At less risk is a cluster having emitted "not accepting commands" errors or "must be vacuumed" warnings at some point. One can test a cluster for this data loss by running VACUUM FREEZE in every database. Back-patch to 9.5 (all supported versions). Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/async.c37
-rw-r--r--src/backend/commands/vacuum.c13
2 files changed, 40 insertions, 10 deletions
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 71b7577afc0..4c1286eb988 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -244,19 +244,22 @@ typedef struct QueueBackendStatus
/*
* Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
*
- * The AsyncQueueControl structure is protected by the NotifyQueueLock.
+ * The AsyncQueueControl structure is protected by the NotifyQueueLock and
+ * NotifyQueueTailLock.
*
- * When holding the lock in SHARED mode, backends may only inspect their own
- * entries as well as the head and tail pointers. Consequently we can allow a
- * backend to update its own record while holding only SHARED lock (since no
- * other backend will inspect it).
+ * When holding NotifyQueueLock in SHARED mode, backends may only inspect
+ * their own entries as well as the head and tail pointers. Consequently we
+ * can allow a backend to update its own record while holding only SHARED lock
+ * (since no other backend will inspect it).
*
- * When holding the lock in EXCLUSIVE mode, backends can inspect the entries
- * of other backends and also change the head and tail pointers.
+ * When holding NotifyQueueLock in EXCLUSIVE mode, backends can inspect the
+ * entries of other backends and also change the head pointer. When holding
+ * both NotifyQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends
+ * can change the tail pointer.
*
* NotifySLRULock is used as the control lock for the pg_notify SLRU buffers.
- * In order to avoid deadlocks, whenever we need both locks, we always first
- * get NotifyQueueLock and then NotifySLRULock.
+ * In order to avoid deadlocks, whenever we need multiple locks, we first get
+ * NotifyQueueTailLock, then NotifyQueueLock, and lastly NotifySLRULock.
*
* Each backend uses the backend[] array entry with index equal to its
* BackendId (which can range from 1 to MaxBackends). We rely on this to make
@@ -2177,6 +2180,10 @@ asyncQueueAdvanceTail(void)
int newtailpage;
int boundary;
+ /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+ LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
+
+ /* Compute the new tail. */
LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
min = QUEUE_HEAD;
for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
@@ -2185,7 +2192,6 @@ asyncQueueAdvanceTail(void)
min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
}
oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
- QUEUE_TAIL = min;
LWLockRelease(NotifyQueueLock);
/*
@@ -2205,6 +2211,17 @@ asyncQueueAdvanceTail(void)
*/
SimpleLruTruncate(NotifyCtl, newtailpage);
}
+
+ /*
+ * Advertise the new tail. This changes asyncQueueIsFull()'s verdict for
+ * the segment immediately prior to the new tail, allowing fresh data into
+ * that segment.
+ */
+ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+ QUEUE_TAIL = min;
+ LWLockRelease(NotifyQueueLock);
+
+ LWLockRelease(NotifyQueueTailLock);
}
/*
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index aba13c31d1b..5189a5ad5e3 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1362,6 +1362,14 @@ vac_update_datfrozenxid(void)
bool dirty = false;
/*
+ * Restrict this task to one backend per database. This avoids race
+ * conditions that would move datfrozenxid or datminmxid backward. It
+ * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
+ * datfrozenxid passed to an earlier vac_truncate_clog() call.
+ */
+ LockDatabaseFrozenIds(ExclusiveLock);
+
+ /*
* Initialize the "min" calculation with
* GetOldestNonRemovableTransactionId(), which is a reasonable
* approximation to the minimum relfrozenxid for not-yet-committed
@@ -1551,6 +1559,9 @@ vac_truncate_clog(TransactionId frozenXID,
bool bogus = false;
bool frozenAlreadyWrapped = false;
+ /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+ LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
+
/* init oldest datoids to sync with my frozenXID/minMulti values */
oldestxid_datoid = MyDatabaseId;
minmulti_datoid = MyDatabaseId;
@@ -1660,6 +1671,8 @@ vac_truncate_clog(TransactionId frozenXID,
*/
SetTransactionIdLimit(frozenXID, oldestxid_datoid);
SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
+
+ LWLockRelease(WrapLimitsVacuumLock);
}