diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 5e9002d25018..1b7a63cbaa40 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -3,14 +3,17 @@
MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
+ verify_common.o \
+ verify_gin.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
-DATA = amcheck--1.3--1.4.sql amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql
+DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_heap
+REGRESS = check check_btree check_gin check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.4--1.5.sql b/contrib/amcheck/amcheck--1.4--1.5.sql
new file mode 100644
index 000000000000..445c48ccb7d7
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.4--1.5.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.4--1.5.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.5'" to load this file. \quit
+
+
+-- gin_index_check()
+--
+CREATE FUNCTION gin_index_check(index regclass)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gin_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gin_index_check(regclass) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index e67ace01c995..c8ba6d7c9bc3 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.4'
+default_version = '1.5'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out
index e7fb5f551574..c6f4b16c5561 100644
--- a/contrib/amcheck/expected/check_btree.out
+++ b/contrib/amcheck/expected/check_btree.out
@@ -57,8 +57,8 @@ ERROR: could not open relation with OID 17
BEGIN;
CREATE INDEX bttest_a_brin_idx ON bttest_a USING brin(id);
SELECT bt_index_parent_check('bttest_a_brin_idx');
-ERROR: only B-Tree indexes are supported as targets for verification
-DETAIL: Relation "bttest_a_brin_idx" is not a B-Tree index.
+ERROR: expected "btree" index as targets for verification
+DETAIL: Relation "bttest_a_brin_idx" is a brin index.
ROLLBACK;
-- normal check outside of xact
SELECT bt_index_check('bttest_a_idx');
diff --git a/contrib/amcheck/expected/check_gin.out b/contrib/amcheck/expected/check_gin.out
new file mode 100644
index 000000000000..93147de0ef11
--- /dev/null
+++ b/contrib/amcheck/expected/check_gin.out
@@ -0,0 +1,78 @@
+-- Test of index bulk load
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+CREATE TABLE "gin_check"("Column1" int[]);
+-- posting trees (frequently used entries)
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves (sparse entries)
+INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+SELECT gin_index_check('gin_check_idx');
+ gin_index_check
+-----------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check;
+-- Test index inserts
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+CREATE TABLE "gin_check"("Column1" int[]);
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+ALTER INDEX gin_check_idx SET (fastupdate = false);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
+SELECT gin_index_check('gin_check_idx');
+ gin_index_check
+-----------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check;
+-- Test GIN over text array
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+CREATE TABLE "gin_check_text_array"("Column1" text[]);
+-- posting trees
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
+CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
+SELECT gin_index_check('gin_check_text_array_idx');
+ gin_index_check
+-----------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check_text_array;
+-- Test GIN over jsonb
+CREATE TABLE "gin_check_jsonb"("j" jsonb);
+INSERT INTO gin_check_jsonb values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
+INSERT INTO gin_check_jsonb values ('[[14,2,3]]');
+INSERT INTO gin_check_jsonb values ('[1,[14,2,3]]');
+CREATE INDEX "gin_check_jsonb_idx" on gin_check_jsonb USING GIN("j" jsonb_path_ops);
+SELECT gin_index_check('gin_check_jsonb_idx');
+ gin_index_check
+-----------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check_jsonb;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 61d7eaf2305d..b33e8c9b062f 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -1,6 +1,8 @@
# Copyright (c) 2022-2025, PostgreSQL Global Development Group
amcheck_sources = files(
+ 'verify_common.c',
+ 'verify_gin.c',
'verify_heapam.c',
'verify_nbtree.c',
)
@@ -24,6 +26,7 @@ install_data(
'amcheck--1.1--1.2.sql',
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
+ 'amcheck--1.4--1.5.sql',
kwargs: contrib_data_args,
)
@@ -35,6 +38,7 @@ tests += {
'sql': [
'check',
'check_btree',
+ 'check_gin',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gin.sql b/contrib/amcheck/sql/check_gin.sql
new file mode 100644
index 000000000000..92ddbbc7a891
--- /dev/null
+++ b/contrib/amcheck/sql/check_gin.sql
@@ -0,0 +1,52 @@
+-- Test of index bulk load
+SELECT setseed(1);
+CREATE TABLE "gin_check"("Column1" int[]);
+-- posting trees (frequently used entries)
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves (sparse entries)
+INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+SELECT gin_index_check('gin_check_idx');
+
+-- cleanup
+DROP TABLE gin_check;
+
+-- Test index inserts
+SELECT setseed(1);
+CREATE TABLE "gin_check"("Column1" int[]);
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+ALTER INDEX gin_check_idx SET (fastupdate = false);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
+
+SELECT gin_index_check('gin_check_idx');
+
+-- cleanup
+DROP TABLE gin_check;
+
+-- Test GIN over text array
+SELECT setseed(1);
+CREATE TABLE "gin_check_text_array"("Column1" text[]);
+-- posting trees
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
+CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
+SELECT gin_index_check('gin_check_text_array_idx');
+
+-- cleanup
+DROP TABLE gin_check_text_array;
+
+-- Test GIN over jsonb
+CREATE TABLE "gin_check_jsonb"("j" jsonb);
+INSERT INTO gin_check_jsonb values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
+INSERT INTO gin_check_jsonb values ('[[14,2,3]]');
+INSERT INTO gin_check_jsonb values ('[1,[14,2,3]]');
+CREATE INDEX "gin_check_jsonb_idx" on gin_check_jsonb USING GIN("j" jsonb_path_ops);
+
+SELECT gin_index_check('gin_check_jsonb_idx');
+
+-- cleanup
+DROP TABLE gin_check_jsonb;
diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl
index 0b6a5a9e4641..6a0c4f611258 100644
--- a/contrib/amcheck/t/002_cic.pl
+++ b/contrib/amcheck/t/002_cic.pl
@@ -21,8 +21,9 @@
'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default));
$node->start;
$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
-$node->safe_psql('postgres', q(CREATE TABLE tbl(i int)));
+$node->safe_psql('postgres', q(CREATE TABLE tbl(i int, j jsonb)));
$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i)));
+$node->safe_psql('postgres', q(CREATE INDEX ginidx ON tbl USING gin(j)));
#
# Stress CIC with pgbench.
@@ -40,13 +41,13 @@
{
'002_pgbench_concurrent_transaction' => q(
BEGIN;
- INSERT INTO tbl VALUES(0);
+ INSERT INTO tbl VALUES(0, '{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
COMMIT;
),
'002_pgbench_concurrent_transaction_savepoints' => q(
BEGIN;
SAVEPOINT s1;
- INSERT INTO tbl VALUES(0);
+ INSERT INTO tbl VALUES(0, '[[14,2,3]]');
COMMIT;
),
'002_pgbench_concurrent_cic' => q(
@@ -54,7 +55,10 @@
\if :gotlock
DROP INDEX CONCURRENTLY idx;
CREATE INDEX CONCURRENTLY idx ON tbl(i);
+ DROP INDEX CONCURRENTLY ginidx;
+ CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);
SELECT bt_index_check('idx',true);
+ SELECT gin_index_check('ginidx');
SELECT pg_advisory_unlock(42);
\endif
)
diff --git a/contrib/amcheck/t/003_cic_2pc.pl b/contrib/amcheck/t/003_cic_2pc.pl
index 9134487f3b49..00a446a381fa 100644
--- a/contrib/amcheck/t/003_cic_2pc.pl
+++ b/contrib/amcheck/t/003_cic_2pc.pl
@@ -25,7 +25,7 @@
'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default));
$node->start;
$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
-$node->safe_psql('postgres', q(CREATE TABLE tbl(i int)));
+$node->safe_psql('postgres', q(CREATE TABLE tbl(i int, j jsonb)));
#
@@ -41,7 +41,7 @@
$main_h->query_safe(
q(
BEGIN;
-INSERT INTO tbl VALUES(0);
+INSERT INTO tbl VALUES(0, '[[14,2,3]]');
));
my $cic_h = $node->background_psql('postgres');
@@ -50,6 +50,7 @@
qr/start/, q(
\echo start
CREATE INDEX CONCURRENTLY idx ON tbl(i);
+CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);
));
$main_h->query_safe(
@@ -60,7 +61,7 @@
$main_h->query_safe(
q(
BEGIN;
-INSERT INTO tbl VALUES(0);
+INSERT INTO tbl VALUES(0, '[[14,2,3]]');
));
$node->safe_psql('postgres', q(COMMIT PREPARED 'a';));
@@ -69,7 +70,7 @@
q(
PREPARE TRANSACTION 'b';
BEGIN;
-INSERT INTO tbl VALUES(0);
+INSERT INTO tbl VALUES(0, '"mary had a little lamb"');
));
$node->safe_psql('postgres', q(COMMIT PREPARED 'b';));
@@ -86,6 +87,9 @@
$result = $node->psql('postgres', q(SELECT bt_index_check('idx',true)));
is($result, '0', 'bt_index_check after overlapping 2PC');
+$result = $node->psql('postgres', q(SELECT gin_index_check('ginidx')));
+is($result, '0', 'gin_index_check after overlapping 2PC');
+
#
# Server restart shall not change whether prepared xact blocks CIC
@@ -94,7 +98,7 @@
$node->safe_psql(
'postgres', q(
BEGIN;
-INSERT INTO tbl VALUES(0);
+INSERT INTO tbl VALUES(0, '{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
PREPARE TRANSACTION 'spans_restart';
BEGIN;
CREATE TABLE unused ();
@@ -108,12 +112,16 @@
\echo start
DROP INDEX CONCURRENTLY idx;
CREATE INDEX CONCURRENTLY idx ON tbl(i);
+DROP INDEX CONCURRENTLY ginidx;
+CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);
));
$node->safe_psql('postgres', "COMMIT PREPARED 'spans_restart'");
$reindex_h->quit;
$result = $node->psql('postgres', q(SELECT bt_index_check('idx',true)));
is($result, '0', 'bt_index_check after 2PC and restart');
+$result = $node->psql('postgres', q(SELECT gin_index_check('ginidx')));
+is($result, '0', 'gin_index_check after 2PC and restart');
#
@@ -136,14 +144,14 @@
{
'003_pgbench_concurrent_2pc' => q(
BEGIN;
- INSERT INTO tbl VALUES(0);
+ INSERT INTO tbl VALUES(0,'null');
PREPARE TRANSACTION 'c:client_id';
COMMIT PREPARED 'c:client_id';
),
'003_pgbench_concurrent_2pc_savepoint' => q(
BEGIN;
SAVEPOINT s1;
- INSERT INTO tbl VALUES(0);
+ INSERT INTO tbl VALUES(0,'[false, "jnvaba", -76, 7, {"_": [1]}, 9]');
PREPARE TRANSACTION 'c:client_id';
COMMIT PREPARED 'c:client_id';
),
@@ -163,7 +171,25 @@
SELECT bt_index_check('idx',true);
SELECT pg_advisory_unlock(42);
\endif
+ ),
+ '005_pgbench_concurrent_cic' => q(
+ SELECT pg_try_advisory_lock(42)::integer AS gotginlock \gset
+ \if :gotginlock
+ DROP INDEX CONCURRENTLY ginidx;
+ CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);
+ SELECT gin_index_check('ginidx');
+ SELECT pg_advisory_unlock(42);
+ \endif
+ ),
+ '006_pgbench_concurrent_ric' => q(
+ SELECT pg_try_advisory_lock(42)::integer AS gotginlock \gset
+ \if :gotginlock
+ REINDEX INDEX CONCURRENTLY ginidx;
+ SELECT gin_index_check('ginidx');
+ SELECT pg_advisory_unlock(42);
+ \endif
)
+
});
$node->stop;
diff --git a/contrib/amcheck/t/006_gin_concurrency.pl b/contrib/amcheck/t/006_gin_concurrency.pl
new file mode 100644
index 000000000000..afc67940d4dd
--- /dev/null
+++ b/contrib/amcheck/t/006_gin_concurrency.pl
@@ -0,0 +1,196 @@
+
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+
+use Test::More;
+
+my $node;
+
+#
+# Test set-up
+#
+$node = PostgreSQL::Test::Cluster->new('test');
+$node->init;
+$node->append_conf('postgresql.conf',
+ 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default));
+$node->start;
+$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
+$node->safe_psql('postgres', q(CREATE TABLE tbl(i integer[], j jsonb, k jsonb)));
+$node->safe_psql('postgres', q(CREATE INDEX ginidx ON tbl USING gin(i, j, k)));
+$node->safe_psql('postgres', q(CREATE TABLE jsondata (i serial, j jsonb)));
+$node->safe_psql('postgres', q(INSERT INTO jsondata (j) VALUES
+ ('1'),
+ ('91'),
+ ('[5]'),
+ ('true'),
+ ('"zxI"'),
+ ('[1, 7]'),
+ ('["", 4]'),
+ ('"utDFBz"'),
+ ('[[9], ""]'),
+ ('"eCvxKPML"'),
+ ('["1VMQNQM"]'),
+ ('{"": "562c"}'),
+ ('[58, 8, null]'),
+ ('{"": {"": 62}}'),
+ ('["", 6, 19, ""]'),
+ ('{"ddfWTQ": true}'),
+ ('["", 734.2, 9, 5]'),
+ ('"GMV27mjtuuqmlltw"'),
+ ('{"dabe": -5, "": 6}'),
+ ('"hgihykirQGIYTcCA30"'),
+ ('[9, {"Utrn": -6}, ""]'),
+ ('"BJTZUMST1_WWEgyqgka_"'),
+ ('["", -4, "", [-2], -47]'),
+ ('{"": [3], "": {"": "y"}}'),
+ ('{"myuijj": "YUWIUZXXLGS"}'),
+ ('{"3": false, "C": "1sHTX"}'),
+ ('"ZGUORVDE_ACF1QXJ_hipgwrks"'),
+ ('{"072": [3, -4], "oh": "eL"}'),
+ ('[{"de": 9, "JWHPMRZJW": [0]}]'),
+ ('"EACJUZEBAFFBEE6706SZLWVGO635"'),
+ ('["P", {"TZW": [""]}, {"": [0]}]'),
+ ('{"": -6, "YMb": -22, "__": [""]}'),
+ ('{"659": [8], "bfc": [0], "V": ""}'),
+ ('{"8776": "1tryl", "Q": 2, "": 4.6}'),
+ ('[[1], "", 9, 0, [1, 0], -1, 0, "C"]'),
+ ('"635321pnpjlfFzhGTIYP9265iA_19D8260"'),
+ ('"klmxsoCFDtzxrhotsqlnmvmzlcbdde34twj"'),
+ ('"GZSXSZVS19ecbe_ZJJED0379c1j9_GSU9167"'),
+ ('{"F18s": {"": -84194}, "ececab2": [""]}'),
+ ('["", {"SVAvgg": "Q"}, 1, 9, "gypy", [1]]'),
+ ('[[""], {"": 5}, "GVZGGVGSWM", 2, ["", 8]]'),
+ ('{"V": 8, "TPNL": [826, null], "4": -9.729}'),
+ ('{"HTJP_DAptxn6": 9, "": "r", "hji4124": ""}'),
+ ('[1, ["9", 5, 6, ""], {"": "", "": "efb"}, 7]'),
+ ('{"": 6, "1251e_cajrgkyzuxBEDM017444EFD": 548}'),
+ ('{"853": -60, "TGLUG_jxmrggv": null, "pjx": ""}'),
+ ('[0, "wsgnnvCfJVV_KOMLVXOUIS9FIQLPXXBbbaohjrpj"]'),
+ ('"nizvkl36908OLW22ecbdeEBMHMiCEEACcikwkjpmu30X_m"'),
+ ('{"bD24eeVZWY": 1, "Bt": 9, "": 6052, "FT": ["h"]}'),
+ ('"CDBnouyzlAMSHJCtguxxizpzgkNYfaNLURVITNLYVPSNLYNy"'),
+ ('{"d": [[4, "N"], null, 6, true], "1PKV": 6, "9": 6}'),
+ ('[-7326, [83, 55], -63, [0, {"": 1}], {"ri0": false}]'),
+ ('{"": 117.38, "FCkx3608szztpvjolomzvlyrshyvrgz": -4.2}'),
+ ('["", 8, {"WXHNG": {"6": 4}}, [null], 7, 2, "", 299, 6]'),
+ ('[[-992.2, "TPm", "", "cedeff79BD8", "t", [1]], 0, [-7]]'),
+ ('[9, 34, ["LONuyiYGQZ"], [7, 88], ["c"], 1, 6, "", [[2]]]'),
+ ('[20, 5, null, "eLHTXRWNV", 8, ["pnpvrum", -3], "FINY", 3]'),
+ ('[{"": "", "b": 2, "d": "egu"}, "aPNK", 2, 9, {"": -79946}]'),
+ ('[1, {"769": 9}, 5, 9821, 22, 0, 2.7, 5, 4, 191, 54.599, 24]'),
+ ('["c", 77, "b_0lplvHJNLMxw", "VN76dhFadaafadfe5dfbco", false]'),
+ ('"TYIHXebbPK_86QMP_199bEEIS__8205986vdC_CFAEFBFCEFCJQRHYoqztv"'),
+ ('"cdmxxxzrhtxpwuyrxinmhb5577NSPHIHMTPQYTXSUVVGJPUUMCBEDb_1569e"'),
+ ('[[5, null, "C"], "ORNR", "mnCb", 1, -800, "6953", ["K", 0], ""]'),
+ ('"SSKLTHJxjxywwquhiwsde353eCIJJjkyvn9946c2cdVadcboiyZFAYMHJWGMMT"'),
+ ('"5185__D5AtvhizvmEVceF3jxtghlCF0789_owmsztJHRMOJ7rlowxqq51XLXJbF"'),
+ ('{"D": 565206, "xupqtmfedff": "ZGJN9", "9": 1, "glzv": -47, "": -8}'),
+ ('{"": 9, "": {"": [null], "ROP": 842}, "": ["5FFD", 7, 5, 1, 94, 1]}'),
+ ('{"JLn": ["8s"], "": "_ahxizrzhivyzvhr", "XSAt": 5, "P": 2838, "": 5}'),
+ ('[51, 3, {"": 9, "": -9, "": [[6]]}, 7, 7, {"": 0}, "TXLQL", 7.6, [7]]'),
+ ('[-38.7, "kre40", 5, {"": null}, "tvuv", 8, "", "", "uizygprwwvh", "1"]'),
+ ('"z934377_nxmzjnuqglgyukjteefeihjyot1irkvwnnrqinptlpzwjgmkjbQMUVxxwvbdz"'),
+ ('[165.9, "dAFD_60JQPYbafh", false, {"": 6, "": "fcfd"}, [[2], "c"], 4, 2]'),
+ ('"ffHOOPVSSACDqiyeecTNWJMWPNRXU283aHRXNUNZZZQPUGYSQTTQXQVJM5eeafcIPGIHcac"'),
+ ('[2, 8, -53, {"": 5}, "F9", 8, "SGUJPNVI", "7OLOZH", 9.84, {"": 6}, 207, 6]'),
+ ('"xqmqmyljhq__ZGWJVNefagsxrsktruhmlinhxloupuVQW0804901NKGGMNNSYYXWQOosz8938"'),
+ ('{"FEoLfaab1160167": {"L": [42, 0]}, "938": "FCCUPGYYYMQSQVZJKM", "knqmk": 2}'),
+ ('"0igyurmOMSXIYHSZQEAcxlvgqdxkhwtrbaabfaaMC138Z_BDRLrythpi30_MPRXMTOILRLswmoy"'),
+ ('"1129BBCABFFAACA9VGVKipnwohaccc9TSIMTOQKHmcGYVeFE_PWKLHmpyj60137672qugtsstugg"'),
+ ('"D3BDA069074174vx48A37IVHWVXLUP9382542ypsl1465pixtryzCBgrkkhrvCC_BDDFatkyXHLIe"'),
+ ('[{"esx7": -53, "ec60834YGVMYoXAAvgxmmqnojyzmiklhdovFipl": 2, "os": 66433}, 9.13]'),
+ ('{"": ["", 4, null, 5, null], "": "3", "5_GMMHTIhPB_F_vsebc1": "Er", "GY": 121.32}'),
+ ('["krTVPYDEd", 5, 8, [6, -6], [[-9], 3340, [[""]]], "", 5, [6, true], 3, "", 1, ""]'),
+ ('{"rBNPKN8446080wruOLeceaCBDCKWNUYYMONSJUlCDFExr": {"": "EE0", "6826": 5, "": 7496}}'),
+ ('[3, {"": -8}, "101dboMVSNKZLVPITLHLPorwwuxxjmjsh", "", "LSQPRVYKWVYK945imrh", 4, 51]'),
+ ('[["HY6"], "", "bcdB", [2, [85, 1], 3, 3, 3, [8]], "", ["_m"], "2", -33, 8, 3, "_xwj"]'),
+ ('["", 0, -3.7, 8, false, null, {"": 5}, 9, "06FccxFcdb283bbZGGVRSMWLJH2_PBAFpwtkbceto"]'),
+ ('[52, "", -39, -7, [1], "c", {"": 9, "": 45528, "G": {"": 7}}, 3, false, 0, "EB", 8, -6]'),
+ ('"qzrkvrlG78CCCEBCptzwwok808805243QXVSYed3efZSKLSNXPxhrS357KJMWSKgrfcFFDFDWKSXJJSIJ_yqJu"'),
+ ('[43, 8, {"": ""}, "uwtv__HURKGJLGGPPW", 9, 66, "yqrvghxuw", {"J": false}, false, 2, 0, 4]'),
+ ('[{"UVL": 7, "": 1}, false, [6, "H"], "boxlgqgm", 3, "znhm", [true], 0, ["e", 3.7], 9, 9.4]'),
+ ('{"825634870117somzqw": 1, "": [5], "gYH": "_XT", "b22412631709RZP": 3, "": "", "FDB": [""]}'),
+ ('[8, ["_bae"], "", "WN", 80, {"o": 2, "aff": 16}, false, true, 4, 6, {"nutzkzikolsxZRQ": 30}]'),
+ ('["588BD9c_xzsn", {"k": 0, "_Ecezlkslrwvjpwrukiqzl": 3, "Ej": "4"}, "TUXwghn1dTNRXJZpswmD", 5]'),
+ ('[{"dC": 7}, {"": 1, "4": 41, "": "", "": "adKS"}, {"": "ypv"}, 6, 9, 2, [-61.46], [1, 3.9], 2]'),
+ ('{"8": 8, "": -364, "855": -238.1, "zj": 9, "SNHJG413": 3, "UMNVI73": [60, 0], "iwvqse": -1.833}'),
+ ('"VTUKMLZKQPHIEniCFZ_cjrhvspxzulvxhqykjzmrw89OGOGISWdcrvpOPLOFALGK809896999xzqnkm63254_xrmcfcedb"'),
+ ('["", "USNQbcexyFDCdBAFWJIphloxwytplyZZR008400FmoiYXVYOHVGV79795644463Aug_aeoDDEjzoziisxoykuijhz"]'),
+ ('{"": 1, "5abB58gXVQVTTMWU3jSHXMMNV": "", "nv": 934, "kjsnhtj": 8, "": [{"xm": [71, 425]}], "": -9}'),
+ ('"__oliqCcbwwyqmtECsqivplcb1NTMOQRZTYRJONOIPWNHKWLJRIHKROMJNZLNGTTKRcedebccdbMTQXSzhynxmllqxuhnxBA_"'),
+ ('["thgACBWGNGMkFFEA", [0, -1349, {"18": "RM", "F3": 6, "dP": "_AF"}, 64, 0, {"f": [8]}], 5, [[0]], 2]')
+));
+
+#
+# Stress gin with pgbench.
+#
+# Modify the table data, and hence the index data, from multiple process
+# while from other processes run the index checking code. This should,
+# if the index is large enough, result in the checks performing across
+# concurrent page splits.
+#
+$node->pgbench(
+ '--no-vacuum --client=20 --transactions=5000',
+ 0,
+ [qr{actually processed}],
+ [qr{^$}],
+ 'concurrent DML and index checking',
+ {
+ '006_gin_concurrency_insert_1' => q(
+ INSERT INTO tbl (i, j, k)
+ (SELECT ARRAY[x.i, y.i, random(0,100000), random(0,100000)], x.j, y.j
+ FROM jsondata x, jsondata y
+ WHERE x.i = random(1,100)
+ AND y.i = random(1,100)
+ )
+ ),
+ '006_gin_concurrency_insert_2' => q(
+ INSERT INTO tbl (i, j, k)
+ (SELECT gs.i, j.j, j.j || j.j
+ FROM jsondata j,
+ (SELECT array_agg(gs) AS i FROM generate_series(random(0,100), random(101,200)) gs) gs
+ WHERE j.i = random(1,100)
+ )
+ ),
+ '006_gin_concurrency_insert_nulls' => q(
+ INSERT INTO tbl (i, j, k) VALUES
+ (null, null, null),
+ (null, null, '[]'),
+ (null, '[]', null),
+ (ARRAY[]::INTEGER[], null, null),
+ (null, '[]', '[]'),
+ (ARRAY[]::INTEGER[], '[]', null),
+ (ARRAY[]::INTEGER[], '[]', '[]')
+ ),
+ '006_gin_concurrency_update_i' => q(
+ UPDATE tbl
+ SET i = (SELECT i || i FROM tbl ORDER BY random() LIMIT 1)
+ WHERE j = (SELECT j FROM tbl ORDER BY random() LIMIT 1);
+ ),
+ '006_gin_concurrency_update_j' => q(
+ UPDATE tbl
+ SET j = (SELECT j || j FROM tbl ORDER BY random() LIMIT 1)
+ WHERE k = (SELECT k FROM tbl ORDER BY random() LIMIT 1);
+ ),
+ '006_gin_concurrency_update_k' => q(
+ UPDATE tbl
+ SET k = (SELECT k || k FROM tbl ORDER BY random() LIMIT 1)
+ WHERE i = (SELECT i FROM tbl ORDER BY random() LIMIT 1);
+ ),
+ '006_gin_concurrency_delete' => q(
+ DELETE FROM tbl
+ WHERE random(1,5) = 3;
+ ),
+ '006_gin_concurrency_gin_index_check' => q(
+ SELECT gin_index_check('ginidx');
+ )
+ });
+
+$node->stop;
+done_testing();
+
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
new file mode 100644
index 000000000000..d095e62ce551
--- /dev/null
+++ b/contrib/amcheck/verify_common.c
@@ -0,0 +1,191 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_common.c
+ * Utility functions common to all access methods.
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_common.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "verify_common.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "commands/tablecmds.h"
+#include "utils/guc.h"
+#include "utils/syscache.h"
+
+static bool amcheck_index_mainfork_expected(Relation rel);
+
+
+/*
+ * Check if index relation should have a file for its main relation fork.
+ * Verification uses this to skip unlogged indexes when in hot standby mode,
+ * where there is simply nothing to verify.
+ *
+ * NB: Caller should call index_checkable() before calling here.
+ */
+static bool
+amcheck_index_mainfork_expected(Relation rel)
+{
+ if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED ||
+ !RecoveryInProgress())
+ return true;
+
+ ereport(NOTICE,
+ (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
+ errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
+ RelationGetRelationName(rel))));
+
+ return false;
+}
+
+/*
+* Amcheck main workhorse.
+* Given index relation OID, lock relation.
+* Next, take a number of standard actions:
+* 1) Make sure the index can be checked
+* 2) change the context of the user,
+* 3) keep track of GUCs modified via index functions
+* 4) execute callback function to verify integrity.
+*/
+void
+amcheck_lock_relation_and_check(Oid indrelid,
+ Oid am_id,
+ IndexDoCheckCallback check,
+ LOCKMODE lockmode,
+ void *state)
+{
+ Oid heapid;
+ Relation indrel;
+ Relation heaprel;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+
+ /*
+ * We must lock table before index to avoid deadlocks. However, if the
+ * passed indrelid isn't an index then IndexGetRelation() will fail.
+ * Rather than emitting a not-very-helpful error message, postpone
+ * complaining, expecting that the is-it-an-index test below will fail.
+ *
+ * In hot standby mode this will raise an error when parentcheck is true.
+ */
+ heapid = IndexGetRelation(indrelid, true);
+ if (OidIsValid(heapid))
+ {
+ heaprel = table_open(heapid, lockmode);
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are
+ * run as that user. Also lock down security-restricted operations
+ * and arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(heaprel->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+ }
+ else
+ {
+ heaprel = NULL;
+ /* Set these just to suppress "uninitialized variable" warnings */
+ save_userid = InvalidOid;
+ save_sec_context = -1;
+ save_nestlevel = -1;
+ }
+
+ /*
+ * Open the target index relations separately (like relation_openrv(), but
+ * with heap relation locked first to prevent deadlocking). In hot
+ * standby mode this will raise an error when parentcheck is true.
+ *
+ * There is no need for the usual indcheckxmin usability horizon test
+ * here, even in the heapallindexed case, because index undergoing
+ * verification only needs to have entries for a new transaction snapshot.
+ * (If this is a parentcheck verification, there is no question about
+ * committed or recently dead heap tuples lacking index entries due to
+ * concurrent activity.)
+ */
+ indrel = index_open(indrelid, lockmode);
+
+ /*
+ * Since we did the IndexGetRelation call above without any lock, it's
+ * barely possible that a race against an index drop/recreation could have
+ * netted us the wrong table.
+ */
+ if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("could not open parent table of index \"%s\"",
+ RelationGetRelationName(indrel))));
+
+ /* Check that relation suitable for checking */
+ if (index_checkable(indrel, am_id))
+ check(indrel, heaprel, state, lockmode == ShareLock);
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /*
+ * Release locks early. That's ok here because nothing in the called
+ * routines will trigger shared cache invalidations to be sent, so we can
+ * relax the usual pattern of only releasing locks after commit.
+ */
+ index_close(indrel, lockmode);
+ if (heaprel)
+ table_close(heaprel, lockmode);
+}
+
+/*
+ * Basic checks about the suitability of a relation for checking as an index.
+ *
+ *
+ * NB: Intentionally not checking permissions, the function is normally not
+ * callable by non-superusers. If granted, it's useful to be able to check a
+ * whole cluster.
+ */
+bool
+index_checkable(Relation rel, Oid am_id)
+{
+ if (rel->rd_rel->relkind != RELKIND_INDEX ||
+ rel->rd_rel->relam != am_id)
+ {
+ HeapTuple amtup;
+ HeapTuple amtuprel;
+
+ amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(am_id));
+ amtuprel = SearchSysCache1(AMOID, ObjectIdGetDatum(rel->rd_rel->relam));
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("expected \"%s\" index as targets for verification", NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname)),
+ errdetail("Relation \"%s\" is a %s index.",
+ RelationGetRelationName(rel), NameStr(((Form_pg_am) GETSTRUCT(amtuprel))->amname))));
+ }
+
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary tables of other sessions"),
+ errdetail("Index \"%s\" is associated with temporary relation.",
+ RelationGetRelationName(rel))));
+
+ if (!rel->rd_index->indisvalid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot check index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail("Index is not valid.")));
+
+ return amcheck_index_mainfork_expected(rel);
+}
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
new file mode 100644
index 000000000000..b2565bfbbab8
--- /dev/null
+++ b/contrib/amcheck/verify_common.h
@@ -0,0 +1,31 @@
+/*-------------------------------------------------------------------------
+ *
+ * amcheck.h
+ * Shared routines for amcheck verifications.
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/amcheck.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "storage/bufpage.h"
+#include "storage/lmgr.h"
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+#include "miscadmin.h"
+
+/* Typedefs for callback functions for amcheck_lock_relation */
+typedef void (*IndexCheckableCallback) (Relation index);
+typedef void (*IndexDoCheckCallback) (Relation rel,
+ Relation heaprel,
+ void *state,
+ bool readonly);
+
+extern void amcheck_lock_relation_and_check(Oid indrelid,
+ Oid am_id,
+ IndexDoCheckCallback check,
+ LOCKMODE lockmode, void *state);
+
+extern bool index_checkable(Relation rel, Oid am_id);
diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c
new file mode 100644
index 000000000000..670f53637d47
--- /dev/null
+++ b/contrib/amcheck/verify_gin.c
@@ -0,0 +1,798 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gin.c
+ * Verifies the integrity of GIN indexes based on invariants.
+ *
+ *
+ * GIN index verification checks a number of invariants:
+ *
+ * - consistency: Paths in GIN graph have to contain consistent keys: tuples
+ * on parent pages consistently include tuples from children pages.
+ *
+ * - graph invariants: Each internal page must have at least one downlink, and
+ * can reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gin.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/nbtree.h"
+#include "catalog/pg_am.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "verify_common.h"
+#include "string.h"
+
+/*
+ * GinScanItem represents one item of depth-first scan of the index.
+ */
+typedef struct GinScanItem
+{
+ int depth;
+ IndexTuple parenttup;
+ BlockNumber parentblk;
+ XLogRecPtr parentlsn;
+ BlockNumber blkno;
+ struct GinScanItem *next;
+} GinScanItem;
+
+/*
+ * GinPostingTreeScanItem represents one item of a depth-first posting tree scan.
+ */
+typedef struct GinPostingTreeScanItem
+{
+ int depth;
+ ItemPointerData parentkey;
+ BlockNumber parentblk;
+ BlockNumber blkno;
+ struct GinPostingTreeScanItem *next;
+} GinPostingTreeScanItem;
+
+
+PG_FUNCTION_INFO_V1(gin_index_check);
+
+static void gin_check_parent_keys_consistency(Relation rel,
+ Relation heaprel,
+ void *callback_state, bool readonly);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gin_refind_parent(Relation rel,
+ BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset);
+
+/*
+ * gin_index_check(index regclass)
+ *
+ * Verify integrity of GIN index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gin_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIN_AM_OID,
+ gin_check_parent_keys_consistency,
+ AccessShareLock,
+ NULL);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Read item pointers from leaf entry tuple.
+ *
+ * Returns a palloc'd array of ItemPointers. The number of items is returned
+ * in *nitems.
+ */
+static ItemPointer
+ginReadTupleWithoutState(IndexTuple itup, int *nitems)
+{
+ Pointer ptr = GinGetPosting(itup);
+ int nipd = GinGetNPosting(itup);
+ ItemPointer ipd;
+ int ndecoded;
+
+ if (GinItupIsCompressed(itup))
+ {
+ if (nipd > 0)
+ {
+ ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded);
+ if (nipd != ndecoded)
+ elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
+ nipd, ndecoded);
+ }
+ else
+ ipd = palloc(0);
+ }
+ else
+ {
+ ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd);
+ memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd);
+ }
+ *nitems = nipd;
+ return ipd;
+}
+
+/*
+ * Scans through a posting tree (given by the root), and verifies that the keys
+ * on a child keys are consistent with the parent.
+ *
+ * Allocates a separate memory context and scans through posting tree graph.
+ */
+static void
+gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GinPostingTreeScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+
+ int leafdepth;
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "posting tree check context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ leafdepth = -1;
+
+ /* Start the scan at the root page */
+ stack = (GinPostingTreeScanItem *) palloc0(sizeof(GinPostingTreeScanItem));
+ stack->depth = 0;
+ ItemPointerSetInvalid(&stack->parentkey);
+ stack->parentblk = InvalidBlockNumber;
+ stack->blkno = posting_tree_root;
+
+ elog(DEBUG3, "processing posting tree at blk %u", posting_tree_root);
+
+ while (stack)
+ {
+ GinPostingTreeScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ OffsetNumber i,
+ maxoff;
+ BlockNumber rightlink;
+
+ CHECK_FOR_INTERRUPTS();
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIN_SHARE);
+ page = (Page) BufferGetPage(buffer);
+
+ Assert(GinPageIsData(page));
+
+ /* Check that the tree has the same height in all branches */
+ if (GinPageIsLeaf(page))
+ {
+ ItemPointerData minItem;
+ int nlist;
+ ItemPointerData *list;
+ char tidrange_buf[MAXPGPATH];
+
+ ItemPointerSetMin(&minItem);
+
+ elog(DEBUG1, "page blk: %u, type leaf", stack->blkno);
+
+ if (leafdepth == -1)
+ leafdepth = stack->depth;
+ else if (stack->depth != leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ list = GinDataLeafPageGetItems(page, &nlist, minItem);
+
+ if (nlist > 0)
+ snprintf(tidrange_buf, sizeof(tidrange_buf),
+ "%d tids (%u, %u) - (%u, %u)",
+ nlist,
+ ItemPointerGetBlockNumberNoCheck(&list[0]),
+ ItemPointerGetOffsetNumberNoCheck(&list[0]),
+ ItemPointerGetBlockNumberNoCheck(&list[nlist - 1]),
+ ItemPointerGetOffsetNumberNoCheck(&list[nlist - 1]));
+ else
+ snprintf(tidrange_buf, sizeof(tidrange_buf), "0 tids");
+
+ if (stack->parentblk != InvalidBlockNumber)
+ elog(DEBUG3, "blk %u: parent %u highkey (%u, %u), %s",
+ stack->blkno,
+ stack->parentblk,
+ ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey),
+ tidrange_buf);
+ else
+ elog(DEBUG3, "blk %u: root leaf, %s",
+ stack->blkno,
+ tidrange_buf);
+
+ if (stack->parentblk != InvalidBlockNumber &&
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey) != InvalidOffsetNumber &&
+ nlist > 0 && ItemPointerCompare(&stack->parentkey, &list[nlist - 1]) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ }
+ else
+ {
+ LocationIndex pd_lower;
+ ItemPointerData bound;
+ int lowersize;
+
+ /*
+ * Check that tuples in each page are properly ordered and
+ * consistent with parent high key
+ */
+ maxoff = GinPageGetOpaque(page)->maxoff;
+ rightlink = GinPageGetOpaque(page)->rightlink;
+
+ elog(DEBUG1, "page blk: %u, type data, maxoff %d", stack->blkno, maxoff);
+
+ if (stack->parentblk != InvalidBlockNumber)
+ elog(DEBUG3, "blk %u: internal posting tree page with %u items, parent %u highkey (%u, %u)",
+ stack->blkno, maxoff, stack->parentblk,
+ ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey));
+ else
+ elog(DEBUG3, "blk %u: root internal posting tree page with %u items",
+ stack->blkno, maxoff);
+
+ /*
+ * A GIN posting tree internal page stores PostingItems in the
+ * 'lower' part of the page. The 'upper' part is unused. The
+ * number of elements is stored in the opaque area (maxoff). Make
+ * sure the size of the 'lower' part agrees with 'maxoff'
+ *
+ * We didn't set pd_lower until PostgreSQL version 9.4, so if this
+ * check fails, it could also be because the index was
+ * binary-upgraded from an earlier version. That was a long time
+ * ago, though, so let's warn if it doesn't match.
+ */
+ pd_lower = ((PageHeader) page)->pd_lower;
+ lowersize = pd_lower - MAXALIGN(SizeOfPageHeaderData);
+ if ((lowersize - MAXALIGN(sizeof(ItemPointerData))) / sizeof(PostingItem) != maxoff)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has unexpected pd_lower %u in posting tree block %u with maxoff %u)",
+ RelationGetRelationName(rel), pd_lower, stack->blkno, maxoff)));
+
+ /*
+ * Before the PostingItems, there's one ItemPointerData in the
+ * 'lower' part that stores the page's high key.
+ */
+ bound = *GinDataPageGetRightBound(page);
+
+ /*
+ * Gin page right bound has a sane value only when not a highkey on
+ * the rightmost page (at a given level). For the rightmost page does
+ * not store the highkey explicitly, and the value is infinity.
+ */
+ if (ItemPointerIsValid(&stack->parentkey) &&
+ rightlink != InvalidBlockNumber &&
+ !ItemPointerEquals(&stack->parentkey, &bound))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": posting tree page's high key (%u, %u) doesn't match the downlink on block %u (parent blk %u, key (%u, %u))",
+ RelationGetRelationName(rel),
+ ItemPointerGetBlockNumberNoCheck(&bound),
+ ItemPointerGetOffsetNumberNoCheck(&bound),
+ stack->blkno, stack->parentblk,
+ ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey))));
+
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ GinPostingTreeScanItem *ptr;
+ PostingItem *posting_item = GinDataPageGetPostingItem(page, i);
+
+ /* ItemPointerGetOffsetNumber expects a valid pointer */
+ if (!(i == maxoff &&
+ rightlink == InvalidBlockNumber))
+ elog(DEBUG3, "key (%u, %u) -> %u",
+ ItemPointerGetBlockNumber(&posting_item->key),
+ ItemPointerGetOffsetNumber(&posting_item->key),
+ BlockIdGetBlockNumber(&posting_item->child_blkno));
+ else
+ elog(DEBUG3, "key (%u, %u) -> %u",
+ 0, 0, BlockIdGetBlockNumber(&posting_item->child_blkno));
+
+ if (i == maxoff && rightlink == InvalidBlockNumber)
+ {
+ /*
+ * The rightmost item in the tree level has (0, 0) as the
+ * key
+ */
+ if (ItemPointerGetBlockNumberNoCheck(&posting_item->key) != 0 ||
+ ItemPointerGetOffsetNumberNoCheck(&posting_item->key) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": rightmost posting tree page (blk %u) has unexpected last key (%u, %u)",
+ RelationGetRelationName(rel),
+ stack->blkno,
+ ItemPointerGetBlockNumberNoCheck(&posting_item->key),
+ ItemPointerGetOffsetNumberNoCheck(&posting_item->key))));
+ }
+ else if (i != FirstOffsetNumber)
+ {
+ PostingItem *previous_posting_item = GinDataPageGetPostingItem(page, i - 1);
+
+ if (ItemPointerCompare(&posting_item->key, &previous_posting_item->key) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has wrong tuple order in posting tree, block %u, offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+ }
+
+ /*
+ * Check if this tuple is consistent with the downlink in the
+ * parent.
+ */
+ if (stack->parentblk != InvalidBlockNumber && i == maxoff &&
+ ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": posting item exceeds parent's high key in postingTree internal page on block %u offset %u",
+ RelationGetRelationName(rel),
+ stack->blkno, i)));
+
+ /* This is an internal page, recurse into the child. */
+ ptr = (GinPostingTreeScanItem *) palloc(sizeof(GinPostingTreeScanItem));
+ ptr->depth = stack->depth + 1;
+
+ /*
+ * Set rightmost parent key to invalid iterm pointer. Its
+ * value is 'Infinity' and not explicitly stored.
+ */
+ if (rightlink == InvalidBlockNumber)
+ ItemPointerSetInvalid(&ptr->parentkey);
+ else
+ ptr->parentkey = posting_item->key;
+
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno);
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+ LockBuffer(buffer, GIN_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+}
+
+/*
+ * Main entry point for GIN checks.
+ *
+ * Allocates memory context and scans through the whole GIN graph.
+ */
+static void
+gin_check_parent_keys_consistency(Relation rel,
+ Relation heaprel,
+ void *callback_state,
+ bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GinScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GinState state;
+ int leafdepth;
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck consistency check context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+ initGinState(&state, rel);
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ leafdepth = -1;
+
+ /* Start the scan at the root page */
+ stack = (GinScanItem *) palloc0(sizeof(GinScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIN_ROOT_BLKNO;
+
+ while (stack)
+ {
+ GinScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ OffsetNumber i,
+ maxoff,
+ prev_attnum;
+ XLogRecPtr lsn;
+ IndexTuple prev_tuple;
+ BlockNumber rightlink;
+
+ CHECK_FOR_INTERRUPTS();
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIN_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+ maxoff = PageGetMaxOffsetNumber(page);
+ rightlink = GinPageGetOpaque(page)->rightlink;
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ elog(DEBUG3, "processing entry tree page at blk %u, maxoff: %u", stack->blkno, maxoff);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (stack->parenttup != NULL)
+ {
+ GinNullCategory parent_key_category;
+ Datum parent_key = gintuple_get_key(&state,
+ stack->parenttup,
+ &parent_key_category);
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno,
+ page, maxoff);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
+ GinNullCategory page_max_key_category;
+ Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category);
+
+ if (rightlink != InvalidBlockNumber &&
+ ginCompareEntries(&state, attnum, page_max_key,
+ page_max_key_category, parent_key,
+ parent_key_category) > 0)
+ {
+ /* split page detected, install right link to the stack */
+ GinScanItem *ptr;
+
+ elog(DEBUG3, "split detected for blk: %u, parent blk: %u", stack->blkno, stack->parentblk);
+
+ ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ /* Check that the tree has the same height in all branches */
+ if (GinPageIsLeaf(page))
+ {
+ if (leafdepth == -1)
+ leafdepth = stack->depth;
+ else if (stack->depth != leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ }
+
+ /*
+ * Check that tuples in each page are properly ordered and consistent
+ * with parent high key
+ */
+ prev_tuple = NULL;
+ prev_attnum = InvalidAttrNumber;
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
+ GinNullCategory prev_key_category;
+ Datum prev_key;
+ GinNullCategory current_key_category;
+ Datum current_key;
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+
+ current_key = gintuple_get_key(&state, idxtuple, ¤t_key_category);
+
+ /*
+ * First block is metadata, skip order check. Also, never check
+ * for high key on rightmost page, as this key is not really
+ * stored explicitly.
+ *
+ * Also make sure to not compare entries for different attnums, which
+ * may be stored on the same page.
+ */
+ if (i != FirstOffsetNumber && attnum == prev_attnum && stack->blkno != GIN_ROOT_BLKNO &&
+ !(i == maxoff && rightlink == InvalidBlockNumber))
+ {
+ prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category);
+ if (ginCompareEntries(&state, attnum, prev_key,
+ prev_key_category, current_key,
+ current_key_category) >= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u",
+ RelationGetRelationName(rel), stack->blkno, i, rightlink)));
+ }
+
+ /*
+ * Check if this tuple is consistent with the downlink in the
+ * parent.
+ */
+ if (stack->parenttup &&
+ i == maxoff)
+ {
+ GinNullCategory parent_key_category;
+ Datum parent_key = gintuple_get_key(&state,
+ stack->parenttup,
+ &parent_key_category);
+
+ if (ginCompareEntries(&state, attnum, current_key,
+ current_key_category, parent_key,
+ parent_key_category) > 0)
+ {
+ /*
+ * There was a discrepancy between parent and child
+ * tuples. We need to verify it is not a result of
+ * concurrent call of gistplacetopage(). So, lock parent
+ * and try to find downlink for current page. It may be
+ * missing due to concurrent page split, this is OK.
+ */
+ pfree(stack->parenttup);
+ stack->parenttup = gin_refind_parent(rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else
+ {
+ parent_key = gintuple_get_key(&state,
+ stack->parenttup,
+ &parent_key_category);
+
+ /*
+ * Check if it is properly adjusted. If succeed,
+ * procced to the next key.
+ */
+ if (ginCompareEntries(&state, attnum, current_key,
+ current_key_category, parent_key,
+ parent_key_category) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+ }
+ }
+ }
+
+ /* If this is an internal page, recurse into the child */
+ if (!GinPageIsLeaf(page))
+ {
+ GinScanItem *ptr;
+
+ ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
+ ptr->depth = stack->depth + 1;
+ /* last tuple in layer has no high key */
+ if (i != maxoff && !GinPageGetOpaque(page)->rightlink)
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ else
+ ptr->parenttup = NULL;
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = GinGetDownlink(idxtuple);
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ /* If this item is a pointer to a posting tree, recurse into it */
+ else if (GinIsPostingTree(idxtuple))
+ {
+ BlockNumber rootPostingTree = GinGetPostingTree(idxtuple);
+
+ gin_check_posting_tree_parent_keys_consistency(rel, rootPostingTree);
+ }
+ else
+ {
+ ItemPointer ipd;
+ int nipd;
+
+ ipd = ginReadTupleWithoutState(idxtuple, &nipd);
+
+ for (int j = 0; j < nipd; j++)
+ {
+ if (!OffsetNumberIsValid(ItemPointerGetOffsetNumber(&ipd[j])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": posting list contains invalid heap pointer on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ }
+ pfree(ipd);
+ }
+
+ prev_tuple = CopyIndexTuple(idxtuple);
+ prev_attnum = attnum;
+ }
+
+ LockBuffer(buffer, GIN_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+}
+
+/*
+ * Verify that a freshly-read page looks sane.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ /*
+ * ReadBuffer verifies that every newly-read page passes
+ * PageHeaderIsValid, which means it either contains a reasonably sane
+ * page header or is all-zero. We have to defend against the all-zero
+ * case, however.
+ */
+ if (PageIsNew(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" contains unexpected zero page at block %u",
+ RelationGetRelationName(rel),
+ BufferGetBlockNumber(buffer)),
+ errhint("Please REINDEX it.")));
+
+ /*
+ * Additionally check that the special area looks sane.
+ */
+ if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" contains corrupted page at block %u",
+ RelationGetRelationName(rel),
+ BufferGetBlockNumber(buffer)),
+ errhint("Please REINDEX it.")));
+
+ if (GinPageIsDeleted(page))
+ {
+ if (!GinPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gin_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno, BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber o,
+ parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIN_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GinPageIsLeaf(parentpage))
+ {
+ UnlockReleaseBuffer(parentbuf);
+ return result;
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /* Found it! Make copy and return it */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GinPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED or LP_DEAD,
+ * since GIN never uses all three. Verify that line pointer has storage,
+ * too.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdIsDead(itemid) || ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index d56eb7637d39..f11c43a0ed79 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -30,6 +30,7 @@
#include "access/tableam.h"
#include "access/transam.h"
#include "access/xact.h"
+#include "verify_common.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "catalog/pg_opfamily_d.h"
@@ -159,14 +160,22 @@ typedef struct BtreeLastVisibleEntry
ItemPointer tid; /* Heap tid */
} BtreeLastVisibleEntry;
+/*
+ * arguments for the bt_index_check_callback callback
+ */
+typedef struct BTCallbackState
+{
+ bool parentcheck;
+ bool heapallindexed;
+ bool rootdescend;
+ bool checkunique;
+} BTCallbackState;
+
PG_FUNCTION_INFO_V1(bt_index_check);
PG_FUNCTION_INFO_V1(bt_index_parent_check);
-static void bt_index_check_internal(Oid indrelid, bool parentcheck,
- bool heapallindexed, bool rootdescend,
- bool checkunique);
-static inline void btree_index_checkable(Relation rel);
-static inline bool btree_index_mainfork_expected(Relation rel);
+static void bt_index_check_callback(Relation indrel, Relation heaprel,
+ void *state, bool readonly);
static void bt_check_every_level(Relation rel, Relation heaprel,
bool heapkeyspace, bool readonly, bool heapallindexed,
bool rootdescend, bool checkunique);
@@ -241,15 +250,21 @@ Datum
bt_index_check(PG_FUNCTION_ARGS)
{
Oid indrelid = PG_GETARG_OID(0);
- bool heapallindexed = false;
- bool checkunique = false;
+ BTCallbackState args;
+
+ args.heapallindexed = false;
+ args.rootdescend = false;
+ args.parentcheck = false;
+ args.checkunique = false;
if (PG_NARGS() >= 2)
- heapallindexed = PG_GETARG_BOOL(1);
- if (PG_NARGS() == 3)
- checkunique = PG_GETARG_BOOL(2);
+ args.heapallindexed = PG_GETARG_BOOL(1);
+ if (PG_NARGS() >= 3)
+ args.checkunique = PG_GETARG_BOOL(2);
- bt_index_check_internal(indrelid, false, heapallindexed, false, checkunique);
+ amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID,
+ bt_index_check_callback,
+ AccessShareLock, &args);
PG_RETURN_VOID();
}
@@ -267,18 +282,23 @@ Datum
bt_index_parent_check(PG_FUNCTION_ARGS)
{
Oid indrelid = PG_GETARG_OID(0);
- bool heapallindexed = false;
- bool rootdescend = false;
- bool checkunique = false;
+ BTCallbackState args;
+
+ args.heapallindexed = false;
+ args.rootdescend = false;
+ args.parentcheck = true;
+ args.checkunique = false;
if (PG_NARGS() >= 2)
- heapallindexed = PG_GETARG_BOOL(1);
+ args.heapallindexed = PG_GETARG_BOOL(1);
if (PG_NARGS() >= 3)
- rootdescend = PG_GETARG_BOOL(2);
- if (PG_NARGS() == 4)
- checkunique = PG_GETARG_BOOL(3);
+ args.rootdescend = PG_GETARG_BOOL(2);
+ if (PG_NARGS() >= 4)
+ args.checkunique = PG_GETARG_BOOL(3);
- bt_index_check_internal(indrelid, true, heapallindexed, rootdescend, checkunique);
+ amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID,
+ bt_index_check_callback,
+ ShareLock, &args);
PG_RETURN_VOID();
}
@@ -287,193 +307,46 @@ bt_index_parent_check(PG_FUNCTION_ARGS)
* Helper for bt_index_[parent_]check, coordinating the bulk of the work.
*/
static void
-bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
- bool rootdescend, bool checkunique)
+bt_index_check_callback(Relation indrel, Relation heaprel, void *state, bool readonly)
{
- Oid heapid;
- Relation indrel;
- Relation heaprel;
- LOCKMODE lockmode;
- Oid save_userid;
- int save_sec_context;
- int save_nestlevel;
-
- if (parentcheck)
- lockmode = ShareLock;
- else
- lockmode = AccessShareLock;
-
- /*
- * We must lock table before index to avoid deadlocks. However, if the
- * passed indrelid isn't an index then IndexGetRelation() will fail.
- * Rather than emitting a not-very-helpful error message, postpone
- * complaining, expecting that the is-it-an-index test below will fail.
- *
- * In hot standby mode this will raise an error when parentcheck is true.
- */
- heapid = IndexGetRelation(indrelid, true);
- if (OidIsValid(heapid))
- {
- heaprel = table_open(heapid, lockmode);
-
- /*
- * Switch to the table owner's userid, so that any index functions are
- * run as that user. Also lock down security-restricted operations
- * and arrange to make GUC variable changes local to this command.
- */
- GetUserIdAndSecContext(&save_userid, &save_sec_context);
- SetUserIdAndSecContext(heaprel->rd_rel->relowner,
- save_sec_context | SECURITY_RESTRICTED_OPERATION);
- save_nestlevel = NewGUCNestLevel();
- RestrictSearchPath();
- }
- else
- {
- heaprel = NULL;
- /* Set these just to suppress "uninitialized variable" warnings */
- save_userid = InvalidOid;
- save_sec_context = -1;
- save_nestlevel = -1;
- }
+ BTCallbackState *args = (BTCallbackState *) state;
+ bool heapkeyspace,
+ allequalimage;
- /*
- * Open the target index relations separately (like relation_openrv(), but
- * with heap relation locked first to prevent deadlocking). In hot
- * standby mode this will raise an error when parentcheck is true.
- *
- * There is no need for the usual indcheckxmin usability horizon test
- * here, even in the heapallindexed case, because index undergoing
- * verification only needs to have entries for a new transaction snapshot.
- * (If this is a parentcheck verification, there is no question about
- * committed or recently dead heap tuples lacking index entries due to
- * concurrent activity.)
- */
- indrel = index_open(indrelid, lockmode);
-
- /*
- * Since we did the IndexGetRelation call above without any lock, it's
- * barely possible that a race against an index drop/recreation could have
- * netted us the wrong table.
- */
- if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false))
+ if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM))
ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_TABLE),
- errmsg("could not open parent table of index \"%s\"",
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" lacks a main relation fork",
RelationGetRelationName(indrel))));
- /* Relation suitable for checking as B-Tree? */
- btree_index_checkable(indrel);
-
- if (btree_index_mainfork_expected(indrel))
+ /* Extract metadata from metapage, and sanitize it in passing */
+ _bt_metaversion(indrel, &heapkeyspace, &allequalimage);
+ if (allequalimage && !heapkeyspace)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version",
+ RelationGetRelationName(indrel))));
+ if (allequalimage && !_bt_allequalimage(indrel, false))
{
- bool heapkeyspace,
- allequalimage;
+ bool has_interval_ops = false;
- if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("index \"%s\" lacks a main relation fork",
- RelationGetRelationName(indrel))));
-
- /* Extract metadata from metapage, and sanitize it in passing */
- _bt_metaversion(indrel, &heapkeyspace, &allequalimage);
- if (allequalimage && !heapkeyspace)
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version",
- RelationGetRelationName(indrel))));
- if (allequalimage && !_bt_allequalimage(indrel, false))
- {
- bool has_interval_ops = false;
-
- for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++)
- if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID)
- has_interval_ops = true;
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe",
- RelationGetRelationName(indrel)),
- has_interval_ops
- ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.")
- : 0));
- }
-
- /* Check index, possibly against table it is an index on */
- bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck,
- heapallindexed, rootdescend, checkunique);
+ for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++)
+ if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID)
+ {
+ has_interval_ops = true;
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe",
+ RelationGetRelationName(indrel)),
+ has_interval_ops
+ ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.")
+ : 0));
+ }
}
- /* Roll back any GUC changes executed by index functions */
- AtEOXact_GUC(false, save_nestlevel);
-
- /* Restore userid and security context */
- SetUserIdAndSecContext(save_userid, save_sec_context);
-
- /*
- * Release locks early. That's ok here because nothing in the called
- * routines will trigger shared cache invalidations to be sent, so we can
- * relax the usual pattern of only releasing locks after commit.
- */
- index_close(indrel, lockmode);
- if (heaprel)
- table_close(heaprel, lockmode);
-}
-
-/*
- * Basic checks about the suitability of a relation for checking as a B-Tree
- * index.
- *
- * NB: Intentionally not checking permissions, the function is normally not
- * callable by non-superusers. If granted, it's useful to be able to check a
- * whole cluster.
- */
-static inline void
-btree_index_checkable(Relation rel)
-{
- if (rel->rd_rel->relkind != RELKIND_INDEX ||
- rel->rd_rel->relam != BTREE_AM_OID)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("only B-Tree indexes are supported as targets for verification"),
- errdetail("Relation \"%s\" is not a B-Tree index.",
- RelationGetRelationName(rel))));
-
- if (RELATION_IS_OTHER_TEMP(rel))
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot access temporary tables of other sessions"),
- errdetail("Index \"%s\" is associated with temporary relation.",
- RelationGetRelationName(rel))));
-
- if (!rel->rd_index->indisvalid)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot check index \"%s\"",
- RelationGetRelationName(rel)),
- errdetail("Index is not valid.")));
-}
-
-/*
- * Check if B-Tree index relation should have a file for its main relation
- * fork. Verification uses this to skip unlogged indexes when in hot standby
- * mode, where there is simply nothing to verify. We behave as if the
- * relation is empty.
- *
- * NB: Caller should call btree_index_checkable() before calling here.
- */
-static inline bool
-btree_index_mainfork_expected(Relation rel)
-{
- if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED ||
- !RecoveryInProgress())
- return true;
-
- ereport(DEBUG1,
- (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
- errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
- RelationGetRelationName(rel))));
-
- return false;
+ /* Check index, possibly against table it is an index on */
+ bt_check_every_level(indrel, heaprel, heapkeyspace, readonly,
+ args->heapallindexed, args->rootdescend, args->checkunique);
}
/*
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index a12aa3abf01a..98f836e15e79 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -188,6 +188,26 @@ ORDER BY c.relpages DESC LIMIT 10;
+
+
+
+ gin_index_check(index regclass) returns void
+
+ gin_index_check
+
+
+
+
+
+ gin_index_check tests that its target GIN index
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+
+
+
+
diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README
index b08073162128..742bcbad499f 100644
--- a/src/backend/access/gin/README
+++ b/src/backend/access/gin/README
@@ -237,10 +237,10 @@ GIN packs keys and downlinks into tuples in a different way.
P_i is grouped with K_{i+1}. -Inf key is not needed.
-There are couple of additional notes regarding K_{n+1} key.
-1) In entry tree rightmost page, a key coupled with P_n doesn't really matter.
+There are a couple of additional notes regarding K_{n+1} key.
+1) In the entry tree on the rightmost page, a key coupled with P_n doesn't really matter.
Highkey is assumed to be infinity.
-2) In posting tree, a key coupled with P_n always doesn't matter. Highkey for
+2) In the posting tree, a key coupled with P_n always doesn't matter. Highkey for
non-rightmost pages is stored separately and accessed via
GinDataPageGetRightBound().
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1279b69422a5..b66cecd87991 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -194,6 +194,7 @@ BOOLEAN
BOX
BTArrayKeyInfo
BTBuildState
+BTCallbackState
BTCycleId
BTDedupInterval
BTDedupState
@@ -1052,8 +1053,10 @@ GinPageOpaque
GinPageOpaqueData
GinPlaceToPageRC
GinPostingList
+GinPostingTreeScanItem
GinQualCounts
GinScanEntry
+GinScanItem
GinScanKey
GinScanOpaque
GinScanOpaqueData