diff options
author | Tomas Vondra | 2025-03-29 14:14:47 +0000 |
---|---|---|
committer | Tomas Vondra | 2025-03-29 14:14:49 +0000 |
commit | d70b17636ddf1ea2c71d1c7bc477372b36ccb66b (patch) | |
tree | 9d3be82b51ed0c9c49d42576ad44cf87312f4372 /contrib | |
parent | fb9dff76635d4c32198f30a3cb503588d557d156 (diff) |
amcheck: Move common routines into a separate module
Before performing checks on an index, we need to take some safety
measures that apply to all index AMs. This includes:
* verifying that the index can be checked - Only selected AMs are
supported by amcheck (right now only B-Tree). The index has to be
valid and not a temporary index from another session.
* changing (and then restoring) user's security context
* obtaining proper locks on the index (and table, if needed)
* discarding GUC changes from the index functions
Until now this was implemented in the B-Tree amcheck module, but it's
something every AM will have to do. So relocate the code into a new
module verify_common for reuse.
The shared steps are implemented by amcheck_lock_relation_and_check(),
receiving the AM-specific verification as a callback. Custom parameters
may be supplied using a pointer.
Author: Andrey Borodin <[email protected]>
Reviewed-By: José Villanova <[email protected]>
Reviewed-By: Aleksander Alekseev <[email protected]>
Reviewed-By: Nikolay Samokhvalov <[email protected]>
Reviewed-By: Andres Freund <[email protected]>
Reviewed-By: Tomas Vondra <[email protected]>
Reviewed-By: Mark Dilger <[email protected]>
Reviewed-By: Peter Geoghegan <[email protected]>
Reviewed-By: Kirill Reshke <[email protected]>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/amcheck/Makefile | 1 | ||||
-rw-r--r-- | contrib/amcheck/expected/check_btree.out | 4 | ||||
-rw-r--r-- | contrib/amcheck/meson.build | 1 | ||||
-rw-r--r-- | contrib/amcheck/verify_common.c | 191 | ||||
-rw-r--r-- | contrib/amcheck/verify_common.h | 31 | ||||
-rw-r--r-- | contrib/amcheck/verify_nbtree.c | 267 |
6 files changed, 296 insertions, 199 deletions
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index 5e9002d2501..c3d70f3369c 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -3,6 +3,7 @@ MODULE_big = amcheck OBJS = \ $(WIN32RES) \ + verify_common.o \ verify_heapam.o \ verify_nbtree.o diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out index e7fb5f55157..c6f4b16c556 100644 --- a/contrib/amcheck/expected/check_btree.out +++ b/contrib/amcheck/expected/check_btree.out @@ -57,8 +57,8 @@ ERROR: could not open relation with OID 17 BEGIN; CREATE INDEX bttest_a_brin_idx ON bttest_a USING brin(id); SELECT bt_index_parent_check('bttest_a_brin_idx'); -ERROR: only B-Tree indexes are supported as targets for verification -DETAIL: Relation "bttest_a_brin_idx" is not a B-Tree index. +ERROR: expected "btree" index as targets for verification +DETAIL: Relation "bttest_a_brin_idx" is a brin index. ROLLBACK; -- normal check outside of xact SELECT bt_index_check('bttest_a_idx'); diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index 61d7eaf2305..67a4ac8518d 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -1,6 +1,7 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group amcheck_sources = files( + 'verify_common.c', 'verify_heapam.c', 'verify_nbtree.c', ) diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c new file mode 100644 index 00000000000..d095e62ce55 --- /dev/null +++ b/contrib/amcheck/verify_common.c @@ -0,0 +1,191 @@ +/*------------------------------------------------------------------------- + * + * verify_common.c + * Utility functions common to all access methods. + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_common.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "verify_common.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablecmds.h" +#include "utils/guc.h" +#include "utils/syscache.h" + +static bool amcheck_index_mainfork_expected(Relation rel); + + +/* + * Check if index relation should have a file for its main relation fork. + * Verification uses this to skip unlogged indexes when in hot standby mode, + * where there is simply nothing to verify. + * + * NB: Caller should call index_checkable() before calling here. + */ +static bool +amcheck_index_mainfork_expected(Relation rel) +{ + if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || + !RecoveryInProgress()) + return true; + + ereport(NOTICE, + (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), + errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", + RelationGetRelationName(rel)))); + + return false; +} + +/* +* Amcheck main workhorse. +* Given index relation OID, lock relation. +* Next, take a number of standard actions: +* 1) Make sure the index can be checked +* 2) change the context of the user, +* 3) keep track of GUCs modified via index functions +* 4) execute callback function to verify integrity. +*/ +void +amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, + void *state) +{ + Oid heapid; + Relation indrel; + Relation heaprel; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + + /* + * We must lock table before index to avoid deadlocks. However, if the + * passed indrelid isn't an index then IndexGetRelation() will fail. + * Rather than emitting a not-very-helpful error message, postpone + * complaining, expecting that the is-it-an-index test below will fail. + * + * In hot standby mode this will raise an error when parentcheck is true. + */ + heapid = IndexGetRelation(indrelid, true); + if (OidIsValid(heapid)) + { + heaprel = table_open(heapid, lockmode); + + /* + * Switch to the table owner's userid, so that any index functions are + * run as that user. Also lock down security-restricted operations + * and arrange to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(heaprel->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + } + else + { + heaprel = NULL; + /* Set these just to suppress "uninitialized variable" warnings */ + save_userid = InvalidOid; + save_sec_context = -1; + save_nestlevel = -1; + } + + /* + * Open the target index relations separately (like relation_openrv(), but + * with heap relation locked first to prevent deadlocking). In hot + * standby mode this will raise an error when parentcheck is true. + * + * There is no need for the usual indcheckxmin usability horizon test + * here, even in the heapallindexed case, because index undergoing + * verification only needs to have entries for a new transaction snapshot. + * (If this is a parentcheck verification, there is no question about + * committed or recently dead heap tuples lacking index entries due to + * concurrent activity.) + */ + indrel = index_open(indrelid, lockmode); + + /* + * Since we did the IndexGetRelation call above without any lock, it's + * barely possible that a race against an index drop/recreation could have + * netted us the wrong table. + */ + if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("could not open parent table of index \"%s\"", + RelationGetRelationName(indrel)))); + + /* Check that relation suitable for checking */ + if (index_checkable(indrel, am_id)) + check(indrel, heaprel, state, lockmode == ShareLock); + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* + * Release locks early. That's ok here because nothing in the called + * routines will trigger shared cache invalidations to be sent, so we can + * relax the usual pattern of only releasing locks after commit. + */ + index_close(indrel, lockmode); + if (heaprel) + table_close(heaprel, lockmode); +} + +/* + * Basic checks about the suitability of a relation for checking as an index. + * + * + * NB: Intentionally not checking permissions, the function is normally not + * callable by non-superusers. If granted, it's useful to be able to check a + * whole cluster. + */ +bool +index_checkable(Relation rel, Oid am_id) +{ + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != am_id) + { + HeapTuple amtup; + HeapTuple amtuprel; + + amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(am_id)); + amtuprel = SearchSysCache1(AMOID, ObjectIdGetDatum(rel->rd_rel->relam)); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("expected \"%s\" index as targets for verification", NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname)), + errdetail("Relation \"%s\" is a %s index.", + RelationGetRelationName(rel), NameStr(((Form_pg_am) GETSTRUCT(amtuprel))->amname)))); + } + + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"), + errdetail("Index \"%s\" is associated with temporary relation.", + RelationGetRelationName(rel)))); + + if (!rel->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot check index \"%s\"", + RelationGetRelationName(rel)), + errdetail("Index is not valid."))); + + return amcheck_index_mainfork_expected(rel); +} diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h new file mode 100644 index 00000000000..b2565bfbbab --- /dev/null +++ b/contrib/amcheck/verify_common.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * amcheck.h + * Shared routines for amcheck verifications. + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.h + * + *------------------------------------------------------------------------- + */ +#include "storage/bufpage.h" +#include "storage/lmgr.h" +#include "storage/lockdefs.h" +#include "utils/relcache.h" +#include "miscadmin.h" + +/* Typedefs for callback functions for amcheck_lock_relation */ +typedef void (*IndexCheckableCallback) (Relation index); +typedef void (*IndexDoCheckCallback) (Relation rel, + Relation heaprel, + void *state, + bool readonly); + +extern void amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, void *state); + +extern bool index_checkable(Relation rel, Oid am_id); diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index d56eb7637d3..f11c43a0ed7 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -30,6 +30,7 @@ #include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" +#include "verify_common.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "catalog/pg_opfamily_d.h" @@ -159,14 +160,22 @@ typedef struct BtreeLastVisibleEntry ItemPointer tid; /* Heap tid */ } BtreeLastVisibleEntry; +/* + * arguments for the bt_index_check_callback callback + */ +typedef struct BTCallbackState +{ + bool parentcheck; + bool heapallindexed; + bool rootdescend; + bool checkunique; +} BTCallbackState; + PG_FUNCTION_INFO_V1(bt_index_check); PG_FUNCTION_INFO_V1(bt_index_parent_check); -static void bt_index_check_internal(Oid indrelid, bool parentcheck, - bool heapallindexed, bool rootdescend, - bool checkunique); -static inline void btree_index_checkable(Relation rel); -static inline bool btree_index_mainfork_expected(Relation rel); +static void bt_index_check_callback(Relation indrel, Relation heaprel, + void *state, bool readonly); static void bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, bool readonly, bool heapallindexed, bool rootdescend, bool checkunique); @@ -241,15 +250,21 @@ Datum bt_index_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; - bool checkunique = false; + BTCallbackState args; + + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = false; + args.checkunique = false; if (PG_NARGS() >= 2) - heapallindexed = PG_GETARG_BOOL(1); - if (PG_NARGS() == 3) - checkunique = PG_GETARG_BOOL(2); + args.heapallindexed = PG_GETARG_BOOL(1); + if (PG_NARGS() >= 3) + args.checkunique = PG_GETARG_BOOL(2); - bt_index_check_internal(indrelid, false, heapallindexed, false, checkunique); + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + AccessShareLock, &args); PG_RETURN_VOID(); } @@ -267,18 +282,23 @@ Datum bt_index_parent_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; - bool rootdescend = false; - bool checkunique = false; + BTCallbackState args; + + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = true; + args.checkunique = false; if (PG_NARGS() >= 2) - heapallindexed = PG_GETARG_BOOL(1); + args.heapallindexed = PG_GETARG_BOOL(1); if (PG_NARGS() >= 3) - rootdescend = PG_GETARG_BOOL(2); - if (PG_NARGS() == 4) - checkunique = PG_GETARG_BOOL(3); + args.rootdescend = PG_GETARG_BOOL(2); + if (PG_NARGS() >= 4) + args.checkunique = PG_GETARG_BOOL(3); - bt_index_check_internal(indrelid, true, heapallindexed, rootdescend, checkunique); + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + ShareLock, &args); PG_RETURN_VOID(); } @@ -287,193 +307,46 @@ bt_index_parent_check(PG_FUNCTION_ARGS) * Helper for bt_index_[parent_]check, coordinating the bulk of the work. */ static void -bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, - bool rootdescend, bool checkunique) +bt_index_check_callback(Relation indrel, Relation heaprel, void *state, bool readonly) { - Oid heapid; - Relation indrel; - Relation heaprel; - LOCKMODE lockmode; - Oid save_userid; - int save_sec_context; - int save_nestlevel; - - if (parentcheck) - lockmode = ShareLock; - else - lockmode = AccessShareLock; - - /* - * We must lock table before index to avoid deadlocks. However, if the - * passed indrelid isn't an index then IndexGetRelation() will fail. - * Rather than emitting a not-very-helpful error message, postpone - * complaining, expecting that the is-it-an-index test below will fail. - * - * In hot standby mode this will raise an error when parentcheck is true. - */ - heapid = IndexGetRelation(indrelid, true); - if (OidIsValid(heapid)) - { - heaprel = table_open(heapid, lockmode); - - /* - * Switch to the table owner's userid, so that any index functions are - * run as that user. Also lock down security-restricted operations - * and arrange to make GUC variable changes local to this command. - */ - GetUserIdAndSecContext(&save_userid, &save_sec_context); - SetUserIdAndSecContext(heaprel->rd_rel->relowner, - save_sec_context | SECURITY_RESTRICTED_OPERATION); - save_nestlevel = NewGUCNestLevel(); - RestrictSearchPath(); - } - else - { - heaprel = NULL; - /* Set these just to suppress "uninitialized variable" warnings */ - save_userid = InvalidOid; - save_sec_context = -1; - save_nestlevel = -1; - } + BTCallbackState *args = (BTCallbackState *) state; + bool heapkeyspace, + allequalimage; - /* - * Open the target index relations separately (like relation_openrv(), but - * with heap relation locked first to prevent deadlocking). In hot - * standby mode this will raise an error when parentcheck is true. - * - * There is no need for the usual indcheckxmin usability horizon test - * here, even in the heapallindexed case, because index undergoing - * verification only needs to have entries for a new transaction snapshot. - * (If this is a parentcheck verification, there is no question about - * committed or recently dead heap tuples lacking index entries due to - * concurrent activity.) - */ - indrel = index_open(indrelid, lockmode); - - /* - * Since we did the IndexGetRelation call above without any lock, it's - * barely possible that a race against an index drop/recreation could have - * netted us the wrong table. - */ - if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("could not open parent table of index \"%s\"", + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" lacks a main relation fork", RelationGetRelationName(indrel)))); - /* Relation suitable for checking as B-Tree? */ - btree_index_checkable(indrel); - - if (btree_index_mainfork_expected(indrel)) + /* Extract metadata from metapage, and sanitize it in passing */ + _bt_metaversion(indrel, &heapkeyspace, &allequalimage); + if (allequalimage && !heapkeyspace) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", + RelationGetRelationName(indrel)))); + if (allequalimage && !_bt_allequalimage(indrel, false)) { - bool heapkeyspace, - allequalimage; + bool has_interval_ops = false; - if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" lacks a main relation fork", - RelationGetRelationName(indrel)))); - - /* Extract metadata from metapage, and sanitize it in passing */ - _bt_metaversion(indrel, &heapkeyspace, &allequalimage); - if (allequalimage && !heapkeyspace) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", - RelationGetRelationName(indrel)))); - if (allequalimage && !_bt_allequalimage(indrel, false)) - { - bool has_interval_ops = false; - - for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++) - if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID) - has_interval_ops = true; - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", - RelationGetRelationName(indrel)), - has_interval_ops - ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.") - : 0)); - } - - /* Check index, possibly against table it is an index on */ - bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck, - heapallindexed, rootdescend, checkunique); + for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++) + if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID) + { + has_interval_ops = true; + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", + RelationGetRelationName(indrel)), + has_interval_ops + ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.") + : 0)); + } } - /* Roll back any GUC changes executed by index functions */ - AtEOXact_GUC(false, save_nestlevel); - - /* Restore userid and security context */ - SetUserIdAndSecContext(save_userid, save_sec_context); - - /* - * Release locks early. That's ok here because nothing in the called - * routines will trigger shared cache invalidations to be sent, so we can - * relax the usual pattern of only releasing locks after commit. - */ - index_close(indrel, lockmode); - if (heaprel) - table_close(heaprel, lockmode); -} - -/* - * Basic checks about the suitability of a relation for checking as a B-Tree - * index. - * - * NB: Intentionally not checking permissions, the function is normally not - * callable by non-superusers. If granted, it's useful to be able to check a - * whole cluster. - */ -static inline void -btree_index_checkable(Relation rel) -{ - if (rel->rd_rel->relkind != RELKIND_INDEX || - rel->rd_rel->relam != BTREE_AM_OID) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("only B-Tree indexes are supported as targets for verification"), - errdetail("Relation \"%s\" is not a B-Tree index.", - RelationGetRelationName(rel)))); - - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot access temporary tables of other sessions"), - errdetail("Index \"%s\" is associated with temporary relation.", - RelationGetRelationName(rel)))); - - if (!rel->rd_index->indisvalid) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot check index \"%s\"", - RelationGetRelationName(rel)), - errdetail("Index is not valid."))); -} - -/* - * Check if B-Tree index relation should have a file for its main relation - * fork. Verification uses this to skip unlogged indexes when in hot standby - * mode, where there is simply nothing to verify. We behave as if the - * relation is empty. - * - * NB: Caller should call btree_index_checkable() before calling here. - */ -static inline bool -btree_index_mainfork_expected(Relation rel) -{ - if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || - !RecoveryInProgress()) - return true; - - ereport(DEBUG1, - (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), - errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", - RelationGetRelationName(rel)))); - - return false; + /* Check index, possibly against table it is an index on */ + bt_check_every_level(indrel, heaprel, heapkeyspace, readonly, + args->heapallindexed, args->rootdescend, args->checkunique); } /* |