From cc2d7101ea189e9c15082e5f8889dde17d13b74b Mon Sep 17 00:00:00 2001 From: Commitfest Bot Date: Thu, 23 Jan 2025 15:50:25 +0000 Subject: [PATCH 1/3] [PATCH]: ./update-unused-lp-10-no-installcheck-v1.patch Disable runningcheck for src/test/modules/injection_points/specs. Directory "injection_points" has specified NO_INSTALLCHECK since before commit c35f419d6efbdf1a050250d84b687e6705917711 added the specs, but that commit neglected to disable the corresponding meson runningcheck. The alternative would be to enable "make installcheck" for ISOLATION, but the GNU make build system lacks a concept of setting NO_INSTALLCHECK for REGRESS without also setting it for ISOLATION. Back-patch to v17, where that commit first appeared, to avoid surprises when back-patching additional specs. Reviewed by FIXME. Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.org --- src/test/modules/injection_points/meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index 989b4db226be..ebe79fe06a12 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -45,6 +45,7 @@ tests += { 'basic', 'inplace', ], + 'runningcheck': false, # align with GNU make build system }, 'tap': { 'env': { From cd78f34f7e74287b49314b88d843f214b0b54a5a Mon Sep 17 00:00:00 2001 From: Commitfest Bot Date: Thu, 23 Jan 2025 15:50:25 +0000 Subject: [PATCH 2/3] [PATCH]: ./update-unused-lp-40-xid2fxid-v1.patch Merge copies of converting an XID to a FullTransactionId. Assume twophase.c is the performance-sensitive caller, and preserve its choice of unlikely() branch hint. Add some retrospective rationale for that choice. Back-patch to v17, for the next commit to use it. Reviewed (in earlier versions) by Michael Paquier. Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.org Discussion: https://postgr.es/m/20250116010051.f3.nmisch@google.com --- contrib/amcheck/verify_heapam.c | 4 +- src/backend/access/transam/twophase.c | 26 ++------ src/backend/access/transam/xlogreader.c | 18 +----- src/backend/utils/adt/xid8funcs.c | 79 ++++++++----------------- src/include/access/transam.h | 43 ++++++++++++++ 5 files changed, 77 insertions(+), 93 deletions(-) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 8a8e36dde7e4..827312306f60 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1877,7 +1877,9 @@ check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok, /* * Convert a TransactionId into a FullTransactionId using our cached values of * the valid transaction ID range. It is the caller's responsibility to have - * already updated the cached values, if necessary. + * already updated the cached values, if necessary. This is akin to + * FullTransactionIdFromAllowableAt(), but it tolerates corruption in the form + * of an xid before epoch 0. */ static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index ab2f4a8a92fd..73a80559194e 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -929,32 +929,16 @@ TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) /* * Compute the FullTransactionId for the given TransactionId. * - * The wrap logic is safe here because the span of active xids cannot exceed one - * epoch at any given time. + * This is safe if the xid has not yet reached COMMIT PREPARED or ROLLBACK + * PREPARED. After those commands, concurrent vac_truncate_clog() may make + * the xid cease to qualify as allowable. XXX Not all callers limit their + * calls accordingly. */ static inline FullTransactionId AdjustToFullTransactionId(TransactionId xid) { - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 epoch; - Assert(TransactionIdIsValid(xid)); - - LWLockAcquire(XidGenLock, LW_SHARED); - nextFullXid = TransamVariables->nextXid; - LWLockRelease(XidGenLock); - - nextXid = XidFromFullTransactionId(nextFullXid); - epoch = EpochFromFullTransactionId(nextFullXid); - if (unlikely(xid > nextXid)) - { - /* Wraparound occurred, must be from a prev epoch. */ - Assert(epoch > 0); - epoch--; - } - - return FullTransactionIdFromEpochAndXid(epoch, xid); + return FullTransactionIdFromAllowableAt(ReadNextFullTransactionId(), xid); } static inline int diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 3596af06172e..91b6a91767d6 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -2166,28 +2166,14 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) FullTransactionId XLogRecGetFullXid(XLogReaderState *record) { - TransactionId xid, - next_xid; - uint32 epoch; - /* * This function is only safe during replay, because it depends on the * replay state. See AdvanceNextFullTransactionIdPastXid() for more. */ Assert(AmStartupProcess() || !IsUnderPostmaster); - xid = XLogRecGetXid(record); - next_xid = XidFromFullTransactionId(TransamVariables->nextXid); - epoch = EpochFromFullTransactionId(TransamVariables->nextXid); - - /* - * If xid is numerically greater than next_xid, it has to be from the last - * epoch. - */ - if (unlikely(xid > next_xid)) - --epoch; - - return FullTransactionIdFromEpochAndXid(epoch, xid); + return FullTransactionIdFromAllowableAt(TransamVariables->nextXid, + XLogRecGetXid(record)); } #endif diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index 4736755b2985..20b28b2528c8 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -97,15 +97,11 @@ static bool TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) { TransactionId xid = XidFromFullTransactionId(fxid); - uint32 now_epoch; - TransactionId now_epoch_next_xid; FullTransactionId now_fullxid; - TransactionId oldest_xid; - FullTransactionId oldest_fxid; + TransactionId oldest_clog_xid; + FullTransactionId oldest_clog_fxid; now_fullxid = ReadNextFullTransactionId(); - now_epoch_next_xid = XidFromFullTransactionId(now_fullxid); - now_epoch = EpochFromFullTransactionId(now_fullxid); if (extracted_xid != NULL) *extracted_xid = xid; @@ -135,52 +131,19 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) /* * If fxid is not older than TransamVariables->oldestClogXid, the relevant - * CLOG entry is guaranteed to still exist. Convert - * TransamVariables->oldestClogXid into a FullTransactionId to compare it - * with fxid. Determine the right epoch knowing that oldest_fxid - * shouldn't be more than 2^31 older than now_fullxid. - */ - oldest_xid = TransamVariables->oldestClogXid; - Assert(TransactionIdPrecedesOrEquals(oldest_xid, now_epoch_next_xid)); - if (oldest_xid <= now_epoch_next_xid) - { - oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch, oldest_xid); - } - else - { - Assert(now_epoch > 0); - oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch - 1, oldest_xid); - } - return !FullTransactionIdPrecedes(fxid, oldest_fxid); -} - -/* - * Convert a TransactionId obtained from a snapshot held by the caller to a - * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that - * we can compute the high order bits. It must have been obtained by the - * caller with ReadNextFullTransactionId() after the snapshot was created. - */ -static FullTransactionId -widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid) -{ - TransactionId next_xid = XidFromFullTransactionId(next_fxid); - uint32 epoch = EpochFromFullTransactionId(next_fxid); - - /* Special transaction ID. */ - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - - /* - * The 64 bit result must be <= next_fxid, since next_fxid hadn't been - * issued yet when the snapshot was created. Every TransactionId in the - * snapshot must therefore be from the same epoch as next_fxid, or the - * epoch before. We know this because next_fxid is never allow to get - * more than one epoch ahead of the TransactionIds in any snapshot. + * CLOG entry is guaranteed to still exist. + * + * TransamVariables->oldestXid governs allowable XIDs. Usually, + * oldestClogXid==oldestXid. It's also possible for oldestClogXid to + * follow oldestXid, in which case oldestXid might advance after our + * ReadNextFullTransactionId() call. If oldestXid has advanced, that + * advancement reinstated the usual oldestClogXid==oldestXid. Whether or + * not that happened, oldestClogXid is allowable relative to now_fullxid. */ - if (xid > next_xid) - epoch--; - - return FullTransactionIdFromEpochAndXid(epoch, xid); + oldest_clog_xid = TransamVariables->oldestClogXid; + oldest_clog_fxid = + FullTransactionIdFromAllowableAt(now_fullxid, oldest_clog_xid); + return !FullTransactionIdPrecedes(fxid, oldest_clog_fxid); } /* @@ -420,12 +383,18 @@ pg_current_snapshot(PG_FUNCTION_ARGS) nxip = cur->xcnt; snap = palloc(PG_SNAPSHOT_SIZE(nxip)); - /* fill */ - snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid); - snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid); + /* + * Fill. This is the current backend's active snapshot, so MyProc->xmin + * is <= all these XIDs. As long as that remains so, oldestXid can't + * advance past any of these XIDs. Hence, these XIDs remain allowable + * relative to next_fxid. + */ + snap->xmin = FullTransactionIdFromAllowableAt(next_fxid, cur->xmin); + snap->xmax = FullTransactionIdFromAllowableAt(next_fxid, cur->xmax); snap->nxip = nxip; for (i = 0; i < nxip; i++) - snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid); + snap->xip[i] = + FullTransactionIdFromAllowableAt(next_fxid, cur->xip[i]); /* * We want them guaranteed to be in ascending order. This also removes diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 0cab8653f1b6..7d82cd2eb562 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -370,6 +370,49 @@ FullTransactionIdNewer(FullTransactionId a, FullTransactionId b) return b; } +/* + * Compute FullTransactionId for the given TransactionId, assuming xid was + * between [oldestXid, nextXid] at the time when TransamVariables->nextXid was + * nextFullXid. When adding calls, evaluate what prevents xid from preceding + * oldestXid if SetTransactionIdLimit() runs between the collection of xid and + * the collection of nextFullXid. + */ +static inline FullTransactionId +FullTransactionIdFromAllowableAt(FullTransactionId nextFullXid, + TransactionId xid) +{ + uint32 epoch; + + /* Special transaction ID. */ + if (!TransactionIdIsNormal(xid)) + return FullTransactionIdFromEpochAndXid(0, xid); + + Assert(TransactionIdPrecedesOrEquals(xid, + XidFromFullTransactionId(nextFullXid))); + + /* + * The 64 bit result must be <= nextFullXid, since nextFullXid hadn't been + * issued yet when xid was in the past. The xid must therefore be from + * the epoch of nextFullXid or the epoch before. We know this because we + * must remove (by freezing) an XID before assigning the XID half an epoch + * ahead of it. + * + * The unlikely() branch hint is dubious. It's perfect for the first 2^32 + * XIDs of a cluster's life. Right at 2^32 XIDs, misprediction shoots to + * 100%, then improves until perfection returns 2^31 XIDs later. Since + * current callers pass relatively-recent XIDs, expect >90% prediction + * accuracy overall. This favors average latency over tail latency. + */ + epoch = EpochFromFullTransactionId(nextFullXid); + if (unlikely(xid > XidFromFullTransactionId(nextFullXid))) + { + Assert(epoch != 0); + epoch--; + } + + return FullTransactionIdFromEpochAndXid(epoch, xid); +} + #endif /* FRONTEND */ #endif /* TRANSAM_H */ From 3951bb09f8e9b38281d9a4def77d6a005e30fa6d Mon Sep 17 00:00:00 2001 From: Commitfest Bot Date: Thu, 23 Jan 2025 15:50:25 +0000 Subject: [PATCH 3/3] [PATCH]: ./update-unused-lp-50-fix-v1.patch At update of non-LP_NORMAL TID, fail instead of corrupting page header. The right mix of DDL and VACUUM could corrupt a page header such that PageIsVerified() durably fails, requiring a restore from backup. One of the test permutations shows a variant not yet fixed. This makes !TransactionIdIsValid(TM_FailureData.xmax) possible with TM_Deleted. Core and PGXN appear indifferent to that. Per bug #17821 from Alexander Lakhin. Back-patch to v13 (all supported versions). The test case is v17+, since it uses INJECTION_POINT. Reviewed by FIXME. Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.org --- src/backend/access/heap/heapam.c | 49 ++++- src/backend/utils/cache/inval.c | 3 + src/include/access/tableam.h | 3 +- src/test/modules/injection_points/Makefile | 5 +- .../expected/syscache-update-pruned.out | 87 +++++++++ .../expected/syscache-update-pruned_1.out | 86 +++++++++ .../injection_points--1.0.sql | 8 + src/test/modules/injection_points/meson.build | 4 +- .../injection_points/regress_injection.c | 71 +++++++ .../specs/syscache-update-pruned.spec | 179 ++++++++++++++++++ 10 files changed, 490 insertions(+), 5 deletions(-) create mode 100644 src/test/modules/injection_points/expected/syscache-update-pruned.out create mode 100644 src/test/modules/injection_points/expected/syscache-update-pruned_1.out create mode 100644 src/test/modules/injection_points/regress_injection.c create mode 100644 src/test/modules/injection_points/specs/syscache-update-pruned.spec diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d0515a180d68..ea0a12b39af7 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -49,8 +49,10 @@ #include "storage/predicate.h" #include "storage/procarray.h" #include "utils/datum.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/spccache.h" +#include "utils/syscache.h" static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, @@ -3254,6 +3256,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, interesting_attrs = bms_add_members(interesting_attrs, id_attrs); block = ItemPointerGetBlockNumber(otid); + INJECTION_POINT("heap_update-before-pin"); buffer = ReadBuffer(relation, block); page = BufferGetPage(buffer); @@ -3269,7 +3272,51 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid)); - Assert(ItemIdIsNormal(lp)); + + /* + * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring + * we see LP_NORMAL here. When the otid origin is a syscache, we may have + * neither a pin nor a snapshot. Hence, we may see other LP_ states, each + * of which indicates concurrent pruning. + * + * Failing with TM_Updated would be most accurate. However, unlike other + * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and + * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted + * does matter to SQL statements UPDATE and MERGE, those SQL statements + * hold a snapshot that ensures LP_NORMAL. Hence, the choice between + * TM_Updated and TM_Deleted affects only the wording of error messages. + * Settle on TM_Deleted, for two reasons. First, it avoids complicating + * the specification of when tmfd->ctid is valid. Second, it creates + * error log evidence that we took this branch. + * + * Since it's possible to see LP_UNUSED at otid, it's also possible to see + * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an + * unrelated row, we'll fail with "duplicate key value violates unique". + * XXX if otid is the live, newer version of the newtup row, we'll discard + * changes originating in versions of this catalog row after the version + * the caller got from syscache. See syscache-update-pruned.spec. + */ + if (!ItemIdIsNormal(lp)) + { + Assert(RelationSupportsSysCache(RelationGetRelid(relation))); + + UnlockReleaseBuffer(buffer); + Assert(!have_tuple_lock); + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + tmfd->ctid = *otid; + tmfd->xmax = InvalidTransactionId; + tmfd->cmax = InvalidCommandId; + *update_indexes = TU_None; + + bms_free(hot_attrs); + bms_free(sum_attrs); + bms_free(key_attrs); + bms_free(id_attrs); + /* modified_attrs not yet initialized */ + bms_free(interesting_attrs); + return TM_Deleted; + } /* * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index f41d314eae3f..32cf28bb8bc7 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -123,6 +123,7 @@ #include "storage/sinval.h" #include "storage/smgr.h" #include "utils/catcache.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/memdebug.h" #include "utils/memutils.h" @@ -1134,6 +1135,8 @@ AtEOXact_Inval(bool isCommit) /* Must be at top of stack */ Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); + INJECTION_POINT("AtEOXact_Inval-with-transInvalInfo"); + if (isCommit) { /* diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 09b9b394e0e2..131c050c15f1 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -136,7 +136,8 @@ typedef enum TU_UpdateIndexes * * xmax is the outdating transaction's XID. If the caller wants to visit the * replacement tuple, it must check that this matches before believing the - * replacement is really a match. + * replacement is really a match. This is InvalidTransactionId if the target + * was !LP_NORMAL (expected only for a TID retrieved from syscache). * * cmax is the outdating command's CID, but only when the failure code is * TM_SelfModified (i.e., something in the current transaction outdated the diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index 0753a9df58c8..4f0161fd33a2 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -5,7 +5,8 @@ OBJS = \ $(WIN32RES) \ injection_points.o \ injection_stats.o \ - injection_stats_fixed.o + injection_stats_fixed.o \ + regress_injection.o EXTENSION = injection_points DATA = injection_points--1.0.sql PGFILEDESC = "injection_points - facility for injection points" @@ -13,7 +14,7 @@ PGFILEDESC = "injection_points - facility for injection points" REGRESS = injection_points reindex_conc REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress -ISOLATION = basic inplace +ISOLATION = basic inplace syscache-update-pruned TAP_TESTS = 1 diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned.out b/src/test/modules/injection_points/expected/syscache-update-pruned.out new file mode 100644 index 000000000000..5dc5a1ddc5e4 --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned.out @@ -0,0 +1,87 @@ +Parsed test spec with 4 sessions + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +ERROR: tuple concurrently deleted +step wakegrant4: <... completed> + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +ERROR: duplicate key value violates unique constraint "pg_class_oid_index" +step wakegrant4: <... completed> + +starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 +step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step r3: ROLLBACK; +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +step wakegrant4: <... completed> +step inspect4: + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; + +relhastriggers|relhassubclass +--------------+-------------- +f |f +(1 row) + diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned_1.out b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out new file mode 100644 index 000000000000..b18857c902eb --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out @@ -0,0 +1,86 @@ +Parsed test spec with 4 sessions + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +step wakegrant4: <... completed> + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +step wakegrant4: <... completed> + +starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 +step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step r3: ROLLBACK; +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +ERROR: tuple concurrently updated +step wakegrant4: <... completed> +step inspect4: + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; + +relhastriggers|relhassubclass +--------------+-------------- +t |t +(1 row) + diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql index 6c81d55e0d36..c445bf64e623 100644 --- a/src/test/modules/injection_points/injection_points--1.0.sql +++ b/src/test/modules/injection_points/injection_points--1.0.sql @@ -97,3 +97,11 @@ CREATE FUNCTION injection_points_stats_fixed(OUT numattach int8, RETURNS record AS 'MODULE_PATHNAME', 'injection_points_stats_fixed' LANGUAGE C STRICT; + +-- +-- regress_injection.c functions +-- +CREATE FUNCTION removable_cutoff(rel regclass) +RETURNS xid8 +AS 'MODULE_PATHNAME' +LANGUAGE C CALLED ON NULL INPUT; diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index ebe79fe06a12..259045e5c2d4 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -8,6 +8,7 @@ injection_points_sources = files( 'injection_points.c', 'injection_stats.c', 'injection_stats_fixed.c', + 'regress_injection.c', ) if host_system == 'windows' @@ -44,8 +45,9 @@ tests += { 'specs': [ 'basic', 'inplace', + 'syscache-update-pruned', ], - 'runningcheck': false, # align with GNU make build system + 'runningcheck': false, # see syscache-update-pruned }, 'tap': { 'env': { diff --git a/src/test/modules/injection_points/regress_injection.c b/src/test/modules/injection_points/regress_injection.c new file mode 100644 index 000000000000..422f4168935f --- /dev/null +++ b/src/test/modules/injection_points/regress_injection.c @@ -0,0 +1,71 @@ +/*-------------------------------------------------------------------------- + * + * regress_injection.c + * Functions supporting test-specific subject matter. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/test/modules/injection_points/regress_injection.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/table.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "storage/procarray.h" +#include "utils/xid8.h" + +/* + * removable_cutoff - for syscache-update-pruned.spec + * + * Wrapper around GetOldestNonRemovableTransactionId(). In general, this can + * move backward. runningcheck=false isolation tests can reasonably prevent + * that. For the causes of backward movement, see + * postgr.es/m/CAEze2Wj%2BV0kTx86xB_YbyaqTr5hnE_igdWAwuhSyjXBYscf5-Q%40mail.gmail.com + * and the header comment for ComputeXidHorizons(). One can assume this + * doesn't move backward if one arranges for concurrent activity not to reach + * AbortTransaction() and not to allocate an XID while connected to another + * database. Non-runningcheck tests can control most concurrent activity, + * except autovacuum and the isolationtester control connection. Neither + * allocates XIDs, and AbortTransaction() in those would justify test failure. + */ +PG_FUNCTION_INFO_V1(removable_cutoff); +Datum +removable_cutoff(PG_FUNCTION_ARGS) +{ + Relation rel = NULL; + TransactionId xid; + FullTransactionId next_fxid_before, + next_fxid; + + /* could take other relkinds callee takes, but we've not yet needed it */ + if (!PG_ARGISNULL(0)) + rel = table_open(PG_GETARG_OID(0), AccessShareLock); + + /* + * No lock or snapshot necessarily prevents oldestXid from advancing past + * "xid" while this function runs. That concerns us only in that we must + * not ascribe "xid" to the wrong epoch. (That may never arise in + * isolation testing, but let's set a good example.) As a crude solution, + * retry until nextXid doesn't change. + */ + next_fxid = ReadNextFullTransactionId(); + do + { + CHECK_FOR_INTERRUPTS(); + next_fxid_before = next_fxid; + xid = GetOldestNonRemovableTransactionId(rel); + next_fxid = ReadNextFullTransactionId(); + } while (!FullTransactionIdEquals(next_fxid, next_fxid_before)); + + if (rel) + table_close(rel, AccessShareLock); + + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromAllowableAt(next_fxid, + xid)); +} diff --git a/src/test/modules/injection_points/specs/syscache-update-pruned.spec b/src/test/modules/injection_points/specs/syscache-update-pruned.spec new file mode 100644 index 000000000000..b48e897431e3 --- /dev/null +++ b/src/test/modules/injection_points/specs/syscache-update-pruned.spec @@ -0,0 +1,179 @@ +# Test race conditions involving: +# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin +# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple +# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED + +# This is a derivative work of inplace.spec, which exercises the corresponding +# race condition for inplace updates. + +# Despite local injection points, this is incompatible with runningcheck. +# First, removable_cutoff() could move backward, per its header comment. +# Second, other activity could trigger sinval queue overflow, negating our +# efforts to delay inval. Third, this deadlock emerges: +# +# - step at2 waits at an injection point, with interrupts held +# - an unrelated backend waits for at2 to do PROCSIGNAL_BARRIER_SMGRRELEASE +# - step waitprunable4 waits for the unrelated backend to release its xmin + +# The alternative expected output is for -DCATCACHE_FORCE_RELEASE, a setting +# that thwarts testing the race conditions this spec seeks. + + +# Need s2 to make a non-HOT update. Otherwise, "VACUUM pg_class" would leave +# an LP_REDIRECT that persists. To get non-HOT, make rels so the pg_class row +# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192). Just to save +# on filesystem syscalls, use relkind=c for every other rel. +setup +{ + CREATE EXTENSION injection_points; + CREATE SCHEMA vactest; + -- Ensure a leader RELOID catcache entry. PARALLEL RESTRICTED since a + -- parallel worker running pg_relation_filenode() would lack that effect. + CREATE FUNCTION vactest.reloid_catcache_set(regclass) RETURNS int + LANGUAGE sql PARALLEL RESTRICTED + AS 'SELECT 0 FROM pg_relation_filenode($1)'; + CREATE FUNCTION vactest.mkrels(text, int, int) RETURNS void + LANGUAGE plpgsql SET search_path = vactest AS $$ + DECLARE + tname text; + BEGIN + FOR i in $2 .. $3 LOOP + tname := $1 || i; + EXECUTE FORMAT('CREATE TYPE ' || tname || ' AS ()'); + RAISE DEBUG '% at %', tname, ctid + FROM pg_class WHERE oid = tname::regclass; + END LOOP; + END + $$; + CREATE PROCEDURE vactest.wait_prunable() LANGUAGE plpgsql AS $$ + DECLARE + barrier xid8; + cutoff xid8; + BEGIN + barrier := pg_current_xact_id(); + -- autovacuum worker RelationCacheInitializePhase3() or the + -- isolationtester control connection might hold a snapshot that + -- limits pruning. Sleep until that clears. + LOOP + ROLLBACK; -- release MyProc->xmin, which could be the oldest + cutoff := removable_cutoff('pg_class'); + EXIT WHEN cutoff >= barrier; + RAISE LOG 'removable cutoff %; waiting for %', cutoff, barrier; + PERFORM pg_sleep(.1); + END LOOP; + END + $$; +} +setup { CALL vactest.wait_prunable(); -- maximize next two VACUUMs } +setup { VACUUM FULL pg_class; -- reduce free space } +setup { VACUUM FREEZE pg_class; -- populate fsm etc. } +setup +{ + SELECT FROM vactest.mkrels('orig', 1, 49); + CREATE TABLE vactest.orig50 (c int) WITH (autovacuum_enabled = off); + CREATE TABLE vactest.child50 (c int) WITH (autovacuum_enabled = off); + SELECT FROM vactest.mkrels('orig', 51, 100); +} +teardown +{ + DROP SCHEMA vactest CASCADE; + DROP EXTENSION injection_points; +} + +# Wait during GRANT. Disable debug_discard_caches, since we're here to +# exercise an outcome that happens under permissible cache staleness. +session s1 +setup { + SET debug_discard_caches = 0; + SELECT FROM injection_points_set_local(); + SELECT FROM injection_points_attach('heap_update-before-pin', 'wait'); +} +step cachefill1 { SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); } +step grant1 { GRANT SELECT ON vactest.orig50 TO PUBLIC; } + +# Update of the tuple that grant1 will update. Wait before sending invals, so +# s1 will not get a cache miss. Choose the commands for making such updates +# from among those whose heavyweight locking does not conflict with GRANT's +# heavyweight locking. (GRANT will see our XID as committed, so observing +# that XID in the tuple xmax also won't block GRANT.) +session s2 +setup { + SELECT FROM injection_points_set_local(); + SELECT FROM + injection_points_attach('AtEOXact_Inval-with-transInvalInfo', 'wait'); +} +step at2 { + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); +} + +# Hold snapshot to block pruning. +session s3 +step snap3 { BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; } +step r3 { ROLLBACK; } + +# Non-blocking actions. +session s4 +step waitprunable4 { CALL vactest.wait_prunable(); } +step vac4 { VACUUM pg_class; } +# Reuse the lp that s1 is waiting to change. I've observed reuse at the 1st +# or 18th CREATE, so create excess. +step mkrels4 { + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED +} +step wakegrant4 { + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); +} +step at4 { ALTER TABLE vactest.child50 INHERIT vactest.orig50; } +step wakeinval4 { + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); +} +# Witness effects of steps at2 and/or at4. +step inspect4 { + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; +} + +# TID from syscache becomes LP_UNUSED. Before the bug fix, this permutation +# made s1 fail with "attempted to update invisible tuple" or an assert. +# However, suppose a pd_lsn value such that (pd_lsn.xlogid, pd_lsn.xrecoff) +# passed for (xmin, xmax) with xmin known-committed and xmax known-aborted. +# Persistent page header corruption ensued. For example, s1 overwrote +# pd_lower, pd_upper, and pd_special as though they were t_ctid. +permutation + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + wakegrant4(grant1) # s1 wakes: "tuple concurrently deleted" + +# add mkrels4: LP_UNUSED becomes a different rel's row +permutation + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + mkrels4 # T0 becomes a new rel + wakegrant4(grant1) # s1 wakes: "duplicate key value violates unique" + +# TID from syscache becomes LP_UNUSED, then becomes a newer version of the +# original rel's row. +permutation + snap3 # sets MyProc->xmin + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + mkrels4 # T1's page becomes full + r3 # clears MyProc->xmin + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + at4 # T1 dead, T0 live + wakegrant4(grant1) # s1 wakes: T0 dead, T2 live + inspect4 # observe loss of at2+at4 changes XXX is an extant bug