diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index 5e9002d25018..1b7a63cbaa40 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -3,14 +3,17 @@ MODULE_big = amcheck OBJS = \ $(WIN32RES) \ + verify_common.o \ + verify_gin.o \ verify_heapam.o \ verify_nbtree.o EXTENSION = amcheck -DATA = amcheck--1.3--1.4.sql amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql +DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \ + amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql PGFILEDESC = "amcheck - function for verifying relation integrity" -REGRESS = check check_btree check_heap +REGRESS = check check_btree check_gin check_heap EXTRA_INSTALL = contrib/pg_walinspect TAP_TESTS = 1 diff --git a/contrib/amcheck/amcheck--1.4--1.5.sql b/contrib/amcheck/amcheck--1.4--1.5.sql new file mode 100644 index 000000000000..445c48ccb7d7 --- /dev/null +++ b/contrib/amcheck/amcheck--1.4--1.5.sql @@ -0,0 +1,14 @@ +/* contrib/amcheck/amcheck--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.5'" to load this file. \quit + + +-- gin_index_check() +-- +CREATE FUNCTION gin_index_check(index regclass) +RETURNS VOID +AS 'MODULE_PATHNAME', 'gin_index_check' +LANGUAGE C STRICT; + +REVOKE ALL ON FUNCTION gin_index_check(regclass) FROM PUBLIC; diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control index e67ace01c995..c8ba6d7c9bc3 100644 --- a/contrib/amcheck/amcheck.control +++ b/contrib/amcheck/amcheck.control @@ -1,5 +1,5 @@ # amcheck extension comment = 'functions for verifying relation integrity' -default_version = '1.4' +default_version = '1.5' module_pathname = '$libdir/amcheck' relocatable = true diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out index e7fb5f551574..c6f4b16c5561 100644 --- a/contrib/amcheck/expected/check_btree.out +++ b/contrib/amcheck/expected/check_btree.out @@ -57,8 +57,8 @@ ERROR: could not open relation with OID 17 BEGIN; CREATE INDEX bttest_a_brin_idx ON bttest_a USING brin(id); SELECT bt_index_parent_check('bttest_a_brin_idx'); -ERROR: only B-Tree indexes are supported as targets for verification -DETAIL: Relation "bttest_a_brin_idx" is not a B-Tree index. +ERROR: expected "btree" index as targets for verification +DETAIL: Relation "bttest_a_brin_idx" is a brin index. ROLLBACK; -- normal check outside of xact SELECT bt_index_check('bttest_a_idx'); diff --git a/contrib/amcheck/expected/check_gin.out b/contrib/amcheck/expected/check_gin.out new file mode 100644 index 000000000000..93147de0ef11 --- /dev/null +++ b/contrib/amcheck/expected/check_gin.out @@ -0,0 +1,78 @@ +-- Test of index bulk load +SELECT setseed(1); + setseed +--------- + +(1 row) + +CREATE TABLE "gin_check"("Column1" int[]); +-- posting trees (frequently used entries) +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves (sparse entries) +INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100; +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +SELECT gin_index_check('gin_check_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check; +-- Test index inserts +SELECT setseed(1); + setseed +--------- + +(1 row) + +CREATE TABLE "gin_check"("Column1" int[]); +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +ALTER INDEX gin_check_idx SET (fastupdate = false); +-- posting trees +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100; +SELECT gin_index_check('gin_check_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check; +-- Test GIN over text array +SELECT setseed(1); + setseed +--------- + +(1 row) + +CREATE TABLE "gin_check_text_array"("Column1" text[]); +-- posting trees +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100; +CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1"); +SELECT gin_index_check('gin_check_text_array_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check_text_array; +-- Test GIN over jsonb +CREATE TABLE "gin_check_jsonb"("j" jsonb); +INSERT INTO gin_check_jsonb values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); +INSERT INTO gin_check_jsonb values ('[[14,2,3]]'); +INSERT INTO gin_check_jsonb values ('[1,[14,2,3]]'); +CREATE INDEX "gin_check_jsonb_idx" on gin_check_jsonb USING GIN("j" jsonb_path_ops); +SELECT gin_index_check('gin_check_jsonb_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check_jsonb; diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index 61d7eaf2305d..b33e8c9b062f 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -1,6 +1,8 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group amcheck_sources = files( + 'verify_common.c', + 'verify_gin.c', 'verify_heapam.c', 'verify_nbtree.c', ) @@ -24,6 +26,7 @@ install_data( 'amcheck--1.1--1.2.sql', 'amcheck--1.2--1.3.sql', 'amcheck--1.3--1.4.sql', + 'amcheck--1.4--1.5.sql', kwargs: contrib_data_args, ) @@ -35,6 +38,7 @@ tests += { 'sql': [ 'check', 'check_btree', + 'check_gin', 'check_heap', ], }, diff --git a/contrib/amcheck/sql/check_gin.sql b/contrib/amcheck/sql/check_gin.sql new file mode 100644 index 000000000000..92ddbbc7a891 --- /dev/null +++ b/contrib/amcheck/sql/check_gin.sql @@ -0,0 +1,52 @@ +-- Test of index bulk load +SELECT setseed(1); +CREATE TABLE "gin_check"("Column1" int[]); +-- posting trees (frequently used entries) +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves (sparse entries) +INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100; +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +SELECT gin_index_check('gin_check_idx'); + +-- cleanup +DROP TABLE gin_check; + +-- Test index inserts +SELECT setseed(1); +CREATE TABLE "gin_check"("Column1" int[]); +CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1"); +ALTER INDEX gin_check_idx SET (fastupdate = false); +-- posting trees +INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100; + +SELECT gin_index_check('gin_check_idx'); + +-- cleanup +DROP TABLE gin_check; + +-- Test GIN over text array +SELECT setseed(1); +CREATE TABLE "gin_check_text_array"("Column1" text[]); +-- posting trees +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000; +-- posting leaves +INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100; +CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1"); +SELECT gin_index_check('gin_check_text_array_idx'); + +-- cleanup +DROP TABLE gin_check_text_array; + +-- Test GIN over jsonb +CREATE TABLE "gin_check_jsonb"("j" jsonb); +INSERT INTO gin_check_jsonb values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); +INSERT INTO gin_check_jsonb values ('[[14,2,3]]'); +INSERT INTO gin_check_jsonb values ('[1,[14,2,3]]'); +CREATE INDEX "gin_check_jsonb_idx" on gin_check_jsonb USING GIN("j" jsonb_path_ops); + +SELECT gin_index_check('gin_check_jsonb_idx'); + +-- cleanup +DROP TABLE gin_check_jsonb; diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl index 0b6a5a9e4641..6a0c4f611258 100644 --- a/contrib/amcheck/t/002_cic.pl +++ b/contrib/amcheck/t/002_cic.pl @@ -21,8 +21,9 @@ 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); -$node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i int, j jsonb))); $node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i))); +$node->safe_psql('postgres', q(CREATE INDEX ginidx ON tbl USING gin(j))); # # Stress CIC with pgbench. @@ -40,13 +41,13 @@ { '002_pgbench_concurrent_transaction' => q( BEGIN; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0, '{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); COMMIT; ), '002_pgbench_concurrent_transaction_savepoints' => q( BEGIN; SAVEPOINT s1; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0, '[[14,2,3]]'); COMMIT; ), '002_pgbench_concurrent_cic' => q( @@ -54,7 +55,10 @@ \if :gotlock DROP INDEX CONCURRENTLY idx; CREATE INDEX CONCURRENTLY idx ON tbl(i); + DROP INDEX CONCURRENTLY ginidx; + CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); SELECT bt_index_check('idx',true); + SELECT gin_index_check('ginidx'); SELECT pg_advisory_unlock(42); \endif ) diff --git a/contrib/amcheck/t/003_cic_2pc.pl b/contrib/amcheck/t/003_cic_2pc.pl index 9134487f3b49..00a446a381fa 100644 --- a/contrib/amcheck/t/003_cic_2pc.pl +++ b/contrib/amcheck/t/003_cic_2pc.pl @@ -25,7 +25,7 @@ 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); -$node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i int, j jsonb))); # @@ -41,7 +41,7 @@ $main_h->query_safe( q( BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '[[14,2,3]]'); )); my $cic_h = $node->background_psql('postgres'); @@ -50,6 +50,7 @@ qr/start/, q( \echo start CREATE INDEX CONCURRENTLY idx ON tbl(i); +CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); )); $main_h->query_safe( @@ -60,7 +61,7 @@ $main_h->query_safe( q( BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '[[14,2,3]]'); )); $node->safe_psql('postgres', q(COMMIT PREPARED 'a';)); @@ -69,7 +70,7 @@ q( PREPARE TRANSACTION 'b'; BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '"mary had a little lamb"'); )); $node->safe_psql('postgres', q(COMMIT PREPARED 'b';)); @@ -86,6 +87,9 @@ $result = $node->psql('postgres', q(SELECT bt_index_check('idx',true))); is($result, '0', 'bt_index_check after overlapping 2PC'); +$result = $node->psql('postgres', q(SELECT gin_index_check('ginidx'))); +is($result, '0', 'gin_index_check after overlapping 2PC'); + # # Server restart shall not change whether prepared xact blocks CIC @@ -94,7 +98,7 @@ $node->safe_psql( 'postgres', q( BEGIN; -INSERT INTO tbl VALUES(0); +INSERT INTO tbl VALUES(0, '{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}'); PREPARE TRANSACTION 'spans_restart'; BEGIN; CREATE TABLE unused (); @@ -108,12 +112,16 @@ \echo start DROP INDEX CONCURRENTLY idx; CREATE INDEX CONCURRENTLY idx ON tbl(i); +DROP INDEX CONCURRENTLY ginidx; +CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); )); $node->safe_psql('postgres', "COMMIT PREPARED 'spans_restart'"); $reindex_h->quit; $result = $node->psql('postgres', q(SELECT bt_index_check('idx',true))); is($result, '0', 'bt_index_check after 2PC and restart'); +$result = $node->psql('postgres', q(SELECT gin_index_check('ginidx'))); +is($result, '0', 'gin_index_check after 2PC and restart'); # @@ -136,14 +144,14 @@ { '003_pgbench_concurrent_2pc' => q( BEGIN; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0,'null'); PREPARE TRANSACTION 'c:client_id'; COMMIT PREPARED 'c:client_id'; ), '003_pgbench_concurrent_2pc_savepoint' => q( BEGIN; SAVEPOINT s1; - INSERT INTO tbl VALUES(0); + INSERT INTO tbl VALUES(0,'[false, "jnvaba", -76, 7, {"_": [1]}, 9]'); PREPARE TRANSACTION 'c:client_id'; COMMIT PREPARED 'c:client_id'; ), @@ -163,7 +171,25 @@ SELECT bt_index_check('idx',true); SELECT pg_advisory_unlock(42); \endif + ), + '005_pgbench_concurrent_cic' => q( + SELECT pg_try_advisory_lock(42)::integer AS gotginlock \gset + \if :gotginlock + DROP INDEX CONCURRENTLY ginidx; + CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j); + SELECT gin_index_check('ginidx'); + SELECT pg_advisory_unlock(42); + \endif + ), + '006_pgbench_concurrent_ric' => q( + SELECT pg_try_advisory_lock(42)::integer AS gotginlock \gset + \if :gotginlock + REINDEX INDEX CONCURRENTLY ginidx; + SELECT gin_index_check('ginidx'); + SELECT pg_advisory_unlock(42); + \endif ) + }); $node->stop; diff --git a/contrib/amcheck/t/006_gin_concurrency.pl b/contrib/amcheck/t/006_gin_concurrency.pl new file mode 100644 index 000000000000..afc67940d4dd --- /dev/null +++ b/contrib/amcheck/t/006_gin_concurrency.pl @@ -0,0 +1,196 @@ + +# Copyright (c) 2021-2025, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More; + +my $node; + +# +# Test set-up +# +$node = PostgreSQL::Test::Cluster->new('test'); +$node->init; +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); +$node->start; +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i integer[], j jsonb, k jsonb))); +$node->safe_psql('postgres', q(CREATE INDEX ginidx ON tbl USING gin(i, j, k))); +$node->safe_psql('postgres', q(CREATE TABLE jsondata (i serial, j jsonb))); +$node->safe_psql('postgres', q(INSERT INTO jsondata (j) VALUES + ('1'), + ('91'), + ('[5]'), + ('true'), + ('"zxI"'), + ('[1, 7]'), + ('["", 4]'), + ('"utDFBz"'), + ('[[9], ""]'), + ('"eCvxKPML"'), + ('["1VMQNQM"]'), + ('{"": "562c"}'), + ('[58, 8, null]'), + ('{"": {"": 62}}'), + ('["", 6, 19, ""]'), + ('{"ddfWTQ": true}'), + ('["", 734.2, 9, 5]'), + ('"GMV27mjtuuqmlltw"'), + ('{"dabe": -5, "": 6}'), + ('"hgihykirQGIYTcCA30"'), + ('[9, {"Utrn": -6}, ""]'), + ('"BJTZUMST1_WWEgyqgka_"'), + ('["", -4, "", [-2], -47]'), + ('{"": [3], "": {"": "y"}}'), + ('{"myuijj": "YUWIUZXXLGS"}'), + ('{"3": false, "C": "1sHTX"}'), + ('"ZGUORVDE_ACF1QXJ_hipgwrks"'), + ('{"072": [3, -4], "oh": "eL"}'), + ('[{"de": 9, "JWHPMRZJW": [0]}]'), + ('"EACJUZEBAFFBEE6706SZLWVGO635"'), + ('["P", {"TZW": [""]}, {"": [0]}]'), + ('{"": -6, "YMb": -22, "__": [""]}'), + ('{"659": [8], "bfc": [0], "V": ""}'), + ('{"8776": "1tryl", "Q": 2, "": 4.6}'), + ('[[1], "", 9, 0, [1, 0], -1, 0, "C"]'), + ('"635321pnpjlfFzhGTIYP9265iA_19D8260"'), + ('"klmxsoCFDtzxrhotsqlnmvmzlcbdde34twj"'), + ('"GZSXSZVS19ecbe_ZJJED0379c1j9_GSU9167"'), + ('{"F18s": {"": -84194}, "ececab2": [""]}'), + ('["", {"SVAvgg": "Q"}, 1, 9, "gypy", [1]]'), + ('[[""], {"": 5}, "GVZGGVGSWM", 2, ["", 8]]'), + ('{"V": 8, "TPNL": [826, null], "4": -9.729}'), + ('{"HTJP_DAptxn6": 9, "": "r", "hji4124": ""}'), + ('[1, ["9", 5, 6, ""], {"": "", "": "efb"}, 7]'), + ('{"": 6, "1251e_cajrgkyzuxBEDM017444EFD": 548}'), + ('{"853": -60, "TGLUG_jxmrggv": null, "pjx": ""}'), + ('[0, "wsgnnvCfJVV_KOMLVXOUIS9FIQLPXXBbbaohjrpj"]'), + ('"nizvkl36908OLW22ecbdeEBMHMiCEEACcikwkjpmu30X_m"'), + ('{"bD24eeVZWY": 1, "Bt": 9, "": 6052, "FT": ["h"]}'), + ('"CDBnouyzlAMSHJCtguxxizpzgkNYfaNLURVITNLYVPSNLYNy"'), + ('{"d": [[4, "N"], null, 6, true], "1PKV": 6, "9": 6}'), + ('[-7326, [83, 55], -63, [0, {"": 1}], {"ri0": false}]'), + ('{"": 117.38, "FCkx3608szztpvjolomzvlyrshyvrgz": -4.2}'), + ('["", 8, {"WXHNG": {"6": 4}}, [null], 7, 2, "", 299, 6]'), + ('[[-992.2, "TPm", "", "cedeff79BD8", "t", [1]], 0, [-7]]'), + ('[9, 34, ["LONuyiYGQZ"], [7, 88], ["c"], 1, 6, "", [[2]]]'), + ('[20, 5, null, "eLHTXRWNV", 8, ["pnpvrum", -3], "FINY", 3]'), + ('[{"": "", "b": 2, "d": "egu"}, "aPNK", 2, 9, {"": -79946}]'), + ('[1, {"769": 9}, 5, 9821, 22, 0, 2.7, 5, 4, 191, 54.599, 24]'), + ('["c", 77, "b_0lplvHJNLMxw", "VN76dhFadaafadfe5dfbco", false]'), + ('"TYIHXebbPK_86QMP_199bEEIS__8205986vdC_CFAEFBFCEFCJQRHYoqztv"'), + ('"cdmxxxzrhtxpwuyrxinmhb5577NSPHIHMTPQYTXSUVVGJPUUMCBEDb_1569e"'), + ('[[5, null, "C"], "ORNR", "mnCb", 1, -800, "6953", ["K", 0], ""]'), + ('"SSKLTHJxjxywwquhiwsde353eCIJJjkyvn9946c2cdVadcboiyZFAYMHJWGMMT"'), + ('"5185__D5AtvhizvmEVceF3jxtghlCF0789_owmsztJHRMOJ7rlowxqq51XLXJbF"'), + ('{"D": 565206, "xupqtmfedff": "ZGJN9", "9": 1, "glzv": -47, "": -8}'), + ('{"": 9, "": {"": [null], "ROP": 842}, "": ["5FFD", 7, 5, 1, 94, 1]}'), + ('{"JLn": ["8s"], "": "_ahxizrzhivyzvhr", "XSAt": 5, "P": 2838, "": 5}'), + ('[51, 3, {"": 9, "": -9, "": [[6]]}, 7, 7, {"": 0}, "TXLQL", 7.6, [7]]'), + ('[-38.7, "kre40", 5, {"": null}, "tvuv", 8, "", "", "uizygprwwvh", "1"]'), + ('"z934377_nxmzjnuqglgyukjteefeihjyot1irkvwnnrqinptlpzwjgmkjbQMUVxxwvbdz"'), + ('[165.9, "dAFD_60JQPYbafh", false, {"": 6, "": "fcfd"}, [[2], "c"], 4, 2]'), + ('"ffHOOPVSSACDqiyeecTNWJMWPNRXU283aHRXNUNZZZQPUGYSQTTQXQVJM5eeafcIPGIHcac"'), + ('[2, 8, -53, {"": 5}, "F9", 8, "SGUJPNVI", "7OLOZH", 9.84, {"": 6}, 207, 6]'), + ('"xqmqmyljhq__ZGWJVNefagsxrsktruhmlinhxloupuVQW0804901NKGGMNNSYYXWQOosz8938"'), + ('{"FEoLfaab1160167": {"L": [42, 0]}, "938": "FCCUPGYYYMQSQVZJKM", "knqmk": 2}'), + ('"0igyurmOMSXIYHSZQEAcxlvgqdxkhwtrbaabfaaMC138Z_BDRLrythpi30_MPRXMTOILRLswmoy"'), + ('"1129BBCABFFAACA9VGVKipnwohaccc9TSIMTOQKHmcGYVeFE_PWKLHmpyj60137672qugtsstugg"'), + ('"D3BDA069074174vx48A37IVHWVXLUP9382542ypsl1465pixtryzCBgrkkhrvCC_BDDFatkyXHLIe"'), + ('[{"esx7": -53, "ec60834YGVMYoXAAvgxmmqnojyzmiklhdovFipl": 2, "os": 66433}, 9.13]'), + ('{"": ["", 4, null, 5, null], "": "3", "5_GMMHTIhPB_F_vsebc1": "Er", "GY": 121.32}'), + ('["krTVPYDEd", 5, 8, [6, -6], [[-9], 3340, [[""]]], "", 5, [6, true], 3, "", 1, ""]'), + ('{"rBNPKN8446080wruOLeceaCBDCKWNUYYMONSJUlCDFExr": {"": "EE0", "6826": 5, "": 7496}}'), + ('[3, {"": -8}, "101dboMVSNKZLVPITLHLPorwwuxxjmjsh", "", "LSQPRVYKWVYK945imrh", 4, 51]'), + ('[["HY6"], "", "bcdB", [2, [85, 1], 3, 3, 3, [8]], "", ["_m"], "2", -33, 8, 3, "_xwj"]'), + ('["", 0, -3.7, 8, false, null, {"": 5}, 9, "06FccxFcdb283bbZGGVRSMWLJH2_PBAFpwtkbceto"]'), + ('[52, "", -39, -7, [1], "c", {"": 9, "": 45528, "G": {"": 7}}, 3, false, 0, "EB", 8, -6]'), + ('"qzrkvrlG78CCCEBCptzwwok808805243QXVSYed3efZSKLSNXPxhrS357KJMWSKgrfcFFDFDWKSXJJSIJ_yqJu"'), + ('[43, 8, {"": ""}, "uwtv__HURKGJLGGPPW", 9, 66, "yqrvghxuw", {"J": false}, false, 2, 0, 4]'), + ('[{"UVL": 7, "": 1}, false, [6, "H"], "boxlgqgm", 3, "znhm", [true], 0, ["e", 3.7], 9, 9.4]'), + ('{"825634870117somzqw": 1, "": [5], "gYH": "_XT", "b22412631709RZP": 3, "": "", "FDB": [""]}'), + ('[8, ["_bae"], "", "WN", 80, {"o": 2, "aff": 16}, false, true, 4, 6, {"nutzkzikolsxZRQ": 30}]'), + ('["588BD9c_xzsn", {"k": 0, "_Ecezlkslrwvjpwrukiqzl": 3, "Ej": "4"}, "TUXwghn1dTNRXJZpswmD", 5]'), + ('[{"dC": 7}, {"": 1, "4": 41, "": "", "": "adKS"}, {"": "ypv"}, 6, 9, 2, [-61.46], [1, 3.9], 2]'), + ('{"8": 8, "": -364, "855": -238.1, "zj": 9, "SNHJG413": 3, "UMNVI73": [60, 0], "iwvqse": -1.833}'), + ('"VTUKMLZKQPHIEniCFZ_cjrhvspxzulvxhqykjzmrw89OGOGISWdcrvpOPLOFALGK809896999xzqnkm63254_xrmcfcedb"'), + ('["", "USNQbcexyFDCdBAFWJIphloxwytplyZZR008400FmoiYXVYOHVGV79795644463Aug_aeoDDEjzoziisxoykuijhz"]'), + ('{"": 1, "5abB58gXVQVTTMWU3jSHXMMNV": "", "nv": 934, "kjsnhtj": 8, "": [{"xm": [71, 425]}], "": -9}'), + ('"__oliqCcbwwyqmtECsqivplcb1NTMOQRZTYRJONOIPWNHKWLJRIHKROMJNZLNGTTKRcedebccdbMTQXSzhynxmllqxuhnxBA_"'), + ('["thgACBWGNGMkFFEA", [0, -1349, {"18": "RM", "F3": 6, "dP": "_AF"}, 64, 0, {"f": [8]}], 5, [[0]], 2]') +)); + +# +# Stress gin with pgbench. +# +# Modify the table data, and hence the index data, from multiple process +# while from other processes run the index checking code. This should, +# if the index is large enough, result in the checks performing across +# concurrent page splits. +# +$node->pgbench( + '--no-vacuum --client=20 --transactions=5000', + 0, + [qr{actually processed}], + [qr{^$}], + 'concurrent DML and index checking', + { + '006_gin_concurrency_insert_1' => q( + INSERT INTO tbl (i, j, k) + (SELECT ARRAY[x.i, y.i, random(0,100000), random(0,100000)], x.j, y.j + FROM jsondata x, jsondata y + WHERE x.i = random(1,100) + AND y.i = random(1,100) + ) + ), + '006_gin_concurrency_insert_2' => q( + INSERT INTO tbl (i, j, k) + (SELECT gs.i, j.j, j.j || j.j + FROM jsondata j, + (SELECT array_agg(gs) AS i FROM generate_series(random(0,100), random(101,200)) gs) gs + WHERE j.i = random(1,100) + ) + ), + '006_gin_concurrency_insert_nulls' => q( + INSERT INTO tbl (i, j, k) VALUES + (null, null, null), + (null, null, '[]'), + (null, '[]', null), + (ARRAY[]::INTEGER[], null, null), + (null, '[]', '[]'), + (ARRAY[]::INTEGER[], '[]', null), + (ARRAY[]::INTEGER[], '[]', '[]') + ), + '006_gin_concurrency_update_i' => q( + UPDATE tbl + SET i = (SELECT i || i FROM tbl ORDER BY random() LIMIT 1) + WHERE j = (SELECT j FROM tbl ORDER BY random() LIMIT 1); + ), + '006_gin_concurrency_update_j' => q( + UPDATE tbl + SET j = (SELECT j || j FROM tbl ORDER BY random() LIMIT 1) + WHERE k = (SELECT k FROM tbl ORDER BY random() LIMIT 1); + ), + '006_gin_concurrency_update_k' => q( + UPDATE tbl + SET k = (SELECT k || k FROM tbl ORDER BY random() LIMIT 1) + WHERE i = (SELECT i FROM tbl ORDER BY random() LIMIT 1); + ), + '006_gin_concurrency_delete' => q( + DELETE FROM tbl + WHERE random(1,5) = 3; + ), + '006_gin_concurrency_gin_index_check' => q( + SELECT gin_index_check('ginidx'); + ) + }); + +$node->stop; +done_testing(); + diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c new file mode 100644 index 000000000000..d095e62ce551 --- /dev/null +++ b/contrib/amcheck/verify_common.c @@ -0,0 +1,191 @@ +/*------------------------------------------------------------------------- + * + * verify_common.c + * Utility functions common to all access methods. + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_common.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "verify_common.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablecmds.h" +#include "utils/guc.h" +#include "utils/syscache.h" + +static bool amcheck_index_mainfork_expected(Relation rel); + + +/* + * Check if index relation should have a file for its main relation fork. + * Verification uses this to skip unlogged indexes when in hot standby mode, + * where there is simply nothing to verify. + * + * NB: Caller should call index_checkable() before calling here. + */ +static bool +amcheck_index_mainfork_expected(Relation rel) +{ + if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || + !RecoveryInProgress()) + return true; + + ereport(NOTICE, + (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), + errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", + RelationGetRelationName(rel)))); + + return false; +} + +/* +* Amcheck main workhorse. +* Given index relation OID, lock relation. +* Next, take a number of standard actions: +* 1) Make sure the index can be checked +* 2) change the context of the user, +* 3) keep track of GUCs modified via index functions +* 4) execute callback function to verify integrity. +*/ +void +amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, + void *state) +{ + Oid heapid; + Relation indrel; + Relation heaprel; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + + /* + * We must lock table before index to avoid deadlocks. However, if the + * passed indrelid isn't an index then IndexGetRelation() will fail. + * Rather than emitting a not-very-helpful error message, postpone + * complaining, expecting that the is-it-an-index test below will fail. + * + * In hot standby mode this will raise an error when parentcheck is true. + */ + heapid = IndexGetRelation(indrelid, true); + if (OidIsValid(heapid)) + { + heaprel = table_open(heapid, lockmode); + + /* + * Switch to the table owner's userid, so that any index functions are + * run as that user. Also lock down security-restricted operations + * and arrange to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(heaprel->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + } + else + { + heaprel = NULL; + /* Set these just to suppress "uninitialized variable" warnings */ + save_userid = InvalidOid; + save_sec_context = -1; + save_nestlevel = -1; + } + + /* + * Open the target index relations separately (like relation_openrv(), but + * with heap relation locked first to prevent deadlocking). In hot + * standby mode this will raise an error when parentcheck is true. + * + * There is no need for the usual indcheckxmin usability horizon test + * here, even in the heapallindexed case, because index undergoing + * verification only needs to have entries for a new transaction snapshot. + * (If this is a parentcheck verification, there is no question about + * committed or recently dead heap tuples lacking index entries due to + * concurrent activity.) + */ + indrel = index_open(indrelid, lockmode); + + /* + * Since we did the IndexGetRelation call above without any lock, it's + * barely possible that a race against an index drop/recreation could have + * netted us the wrong table. + */ + if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("could not open parent table of index \"%s\"", + RelationGetRelationName(indrel)))); + + /* Check that relation suitable for checking */ + if (index_checkable(indrel, am_id)) + check(indrel, heaprel, state, lockmode == ShareLock); + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* + * Release locks early. That's ok here because nothing in the called + * routines will trigger shared cache invalidations to be sent, so we can + * relax the usual pattern of only releasing locks after commit. + */ + index_close(indrel, lockmode); + if (heaprel) + table_close(heaprel, lockmode); +} + +/* + * Basic checks about the suitability of a relation for checking as an index. + * + * + * NB: Intentionally not checking permissions, the function is normally not + * callable by non-superusers. If granted, it's useful to be able to check a + * whole cluster. + */ +bool +index_checkable(Relation rel, Oid am_id) +{ + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != am_id) + { + HeapTuple amtup; + HeapTuple amtuprel; + + amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(am_id)); + amtuprel = SearchSysCache1(AMOID, ObjectIdGetDatum(rel->rd_rel->relam)); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("expected \"%s\" index as targets for verification", NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname)), + errdetail("Relation \"%s\" is a %s index.", + RelationGetRelationName(rel), NameStr(((Form_pg_am) GETSTRUCT(amtuprel))->amname)))); + } + + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"), + errdetail("Index \"%s\" is associated with temporary relation.", + RelationGetRelationName(rel)))); + + if (!rel->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot check index \"%s\"", + RelationGetRelationName(rel)), + errdetail("Index is not valid."))); + + return amcheck_index_mainfork_expected(rel); +} diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h new file mode 100644 index 000000000000..b2565bfbbab8 --- /dev/null +++ b/contrib/amcheck/verify_common.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * amcheck.h + * Shared routines for amcheck verifications. + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.h + * + *------------------------------------------------------------------------- + */ +#include "storage/bufpage.h" +#include "storage/lmgr.h" +#include "storage/lockdefs.h" +#include "utils/relcache.h" +#include "miscadmin.h" + +/* Typedefs for callback functions for amcheck_lock_relation */ +typedef void (*IndexCheckableCallback) (Relation index); +typedef void (*IndexDoCheckCallback) (Relation rel, + Relation heaprel, + void *state, + bool readonly); + +extern void amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, void *state); + +extern bool index_checkable(Relation rel, Oid am_id); diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c new file mode 100644 index 000000000000..670f53637d47 --- /dev/null +++ b/contrib/amcheck/verify_gin.c @@ -0,0 +1,798 @@ +/*------------------------------------------------------------------------- + * + * verify_gin.c + * Verifies the integrity of GIN indexes based on invariants. + * + * + * GIN index verification checks a number of invariants: + * + * - consistency: Paths in GIN graph have to contain consistent keys: tuples + * on parent pages consistently include tuples from children pages. + * + * - graph invariants: Each internal page must have at least one downlink, and + * can reference either only leaf pages or only internal pages. + * + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_gin.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/gin_private.h" +#include "access/nbtree.h" +#include "catalog/pg_am.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "verify_common.h" +#include "string.h" + +/* + * GinScanItem represents one item of depth-first scan of the index. + */ +typedef struct GinScanItem +{ + int depth; + IndexTuple parenttup; + BlockNumber parentblk; + XLogRecPtr parentlsn; + BlockNumber blkno; + struct GinScanItem *next; +} GinScanItem; + +/* + * GinPostingTreeScanItem represents one item of a depth-first posting tree scan. + */ +typedef struct GinPostingTreeScanItem +{ + int depth; + ItemPointerData parentkey; + BlockNumber parentblk; + BlockNumber blkno; + struct GinPostingTreeScanItem *next; +} GinPostingTreeScanItem; + + +PG_FUNCTION_INFO_V1(gin_index_check); + +static void gin_check_parent_keys_consistency(Relation rel, + Relation heaprel, + void *callback_state, bool readonly); +static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo); +static IndexTuple gin_refind_parent(Relation rel, + BlockNumber parentblkno, + BlockNumber childblkno, + BufferAccessStrategy strategy); +static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block, Page page, + OffsetNumber offset); + +/* + * gin_index_check(index regclass) + * + * Verify integrity of GIN index. + * + * Acquires AccessShareLock on heap & index relations. + */ +Datum +gin_index_check(PG_FUNCTION_ARGS) +{ + Oid indrelid = PG_GETARG_OID(0); + + amcheck_lock_relation_and_check(indrelid, + GIN_AM_OID, + gin_check_parent_keys_consistency, + AccessShareLock, + NULL); + + PG_RETURN_VOID(); +} + +/* + * Read item pointers from leaf entry tuple. + * + * Returns a palloc'd array of ItemPointers. The number of items is returned + * in *nitems. + */ +static ItemPointer +ginReadTupleWithoutState(IndexTuple itup, int *nitems) +{ + Pointer ptr = GinGetPosting(itup); + int nipd = GinGetNPosting(itup); + ItemPointer ipd; + int ndecoded; + + if (GinItupIsCompressed(itup)) + { + if (nipd > 0) + { + ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded); + if (nipd != ndecoded) + elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded", + nipd, ndecoded); + } + else + ipd = palloc(0); + } + else + { + ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd); + memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd); + } + *nitems = nipd; + return ipd; +} + +/* + * Scans through a posting tree (given by the root), and verifies that the keys + * on a child keys are consistent with the parent. + * + * Allocates a separate memory context and scans through posting tree graph. + */ +static void +gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root) +{ + BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD); + GinPostingTreeScanItem *stack; + MemoryContext mctx; + MemoryContext oldcontext; + + int leafdepth; + + mctx = AllocSetContextCreate(CurrentMemoryContext, + "posting tree check context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(mctx); + + /* + * We don't know the height of the tree yet, but as soon as we encounter a + * leaf page, we will set 'leafdepth' to its depth. + */ + leafdepth = -1; + + /* Start the scan at the root page */ + stack = (GinPostingTreeScanItem *) palloc0(sizeof(GinPostingTreeScanItem)); + stack->depth = 0; + ItemPointerSetInvalid(&stack->parentkey); + stack->parentblk = InvalidBlockNumber; + stack->blkno = posting_tree_root; + + elog(DEBUG3, "processing posting tree at blk %u", posting_tree_root); + + while (stack) + { + GinPostingTreeScanItem *stack_next; + Buffer buffer; + Page page; + OffsetNumber i, + maxoff; + BlockNumber rightlink; + + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, + RBM_NORMAL, strategy); + LockBuffer(buffer, GIN_SHARE); + page = (Page) BufferGetPage(buffer); + + Assert(GinPageIsData(page)); + + /* Check that the tree has the same height in all branches */ + if (GinPageIsLeaf(page)) + { + ItemPointerData minItem; + int nlist; + ItemPointerData *list; + char tidrange_buf[MAXPGPATH]; + + ItemPointerSetMin(&minItem); + + elog(DEBUG1, "page blk: %u, type leaf", stack->blkno); + + if (leafdepth == -1) + leafdepth = stack->depth; + else if (stack->depth != leafdepth) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u", + RelationGetRelationName(rel), stack->blkno))); + list = GinDataLeafPageGetItems(page, &nlist, minItem); + + if (nlist > 0) + snprintf(tidrange_buf, sizeof(tidrange_buf), + "%d tids (%u, %u) - (%u, %u)", + nlist, + ItemPointerGetBlockNumberNoCheck(&list[0]), + ItemPointerGetOffsetNumberNoCheck(&list[0]), + ItemPointerGetBlockNumberNoCheck(&list[nlist - 1]), + ItemPointerGetOffsetNumberNoCheck(&list[nlist - 1])); + else + snprintf(tidrange_buf, sizeof(tidrange_buf), "0 tids"); + + if (stack->parentblk != InvalidBlockNumber) + elog(DEBUG3, "blk %u: parent %u highkey (%u, %u), %s", + stack->blkno, + stack->parentblk, + ItemPointerGetBlockNumberNoCheck(&stack->parentkey), + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey), + tidrange_buf); + else + elog(DEBUG3, "blk %u: root leaf, %s", + stack->blkno, + tidrange_buf); + + if (stack->parentblk != InvalidBlockNumber && + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey) != InvalidOffsetNumber && + nlist > 0 && ItemPointerCompare(&stack->parentkey, &list[nlist - 1]) < 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u", + RelationGetRelationName(rel), stack->blkno))); + } + else + { + LocationIndex pd_lower; + ItemPointerData bound; + int lowersize; + + /* + * Check that tuples in each page are properly ordered and + * consistent with parent high key + */ + maxoff = GinPageGetOpaque(page)->maxoff; + rightlink = GinPageGetOpaque(page)->rightlink; + + elog(DEBUG1, "page blk: %u, type data, maxoff %d", stack->blkno, maxoff); + + if (stack->parentblk != InvalidBlockNumber) + elog(DEBUG3, "blk %u: internal posting tree page with %u items, parent %u highkey (%u, %u)", + stack->blkno, maxoff, stack->parentblk, + ItemPointerGetBlockNumberNoCheck(&stack->parentkey), + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey)); + else + elog(DEBUG3, "blk %u: root internal posting tree page with %u items", + stack->blkno, maxoff); + + /* + * A GIN posting tree internal page stores PostingItems in the + * 'lower' part of the page. The 'upper' part is unused. The + * number of elements is stored in the opaque area (maxoff). Make + * sure the size of the 'lower' part agrees with 'maxoff' + * + * We didn't set pd_lower until PostgreSQL version 9.4, so if this + * check fails, it could also be because the index was + * binary-upgraded from an earlier version. That was a long time + * ago, though, so let's warn if it doesn't match. + */ + pd_lower = ((PageHeader) page)->pd_lower; + lowersize = pd_lower - MAXALIGN(SizeOfPageHeaderData); + if ((lowersize - MAXALIGN(sizeof(ItemPointerData))) / sizeof(PostingItem) != maxoff) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has unexpected pd_lower %u in posting tree block %u with maxoff %u)", + RelationGetRelationName(rel), pd_lower, stack->blkno, maxoff))); + + /* + * Before the PostingItems, there's one ItemPointerData in the + * 'lower' part that stores the page's high key. + */ + bound = *GinDataPageGetRightBound(page); + + /* + * Gin page right bound has a sane value only when not a highkey on + * the rightmost page (at a given level). For the rightmost page does + * not store the highkey explicitly, and the value is infinity. + */ + if (ItemPointerIsValid(&stack->parentkey) && + rightlink != InvalidBlockNumber && + !ItemPointerEquals(&stack->parentkey, &bound)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": posting tree page's high key (%u, %u) doesn't match the downlink on block %u (parent blk %u, key (%u, %u))", + RelationGetRelationName(rel), + ItemPointerGetBlockNumberNoCheck(&bound), + ItemPointerGetOffsetNumberNoCheck(&bound), + stack->blkno, stack->parentblk, + ItemPointerGetBlockNumberNoCheck(&stack->parentkey), + ItemPointerGetOffsetNumberNoCheck(&stack->parentkey)))); + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + GinPostingTreeScanItem *ptr; + PostingItem *posting_item = GinDataPageGetPostingItem(page, i); + + /* ItemPointerGetOffsetNumber expects a valid pointer */ + if (!(i == maxoff && + rightlink == InvalidBlockNumber)) + elog(DEBUG3, "key (%u, %u) -> %u", + ItemPointerGetBlockNumber(&posting_item->key), + ItemPointerGetOffsetNumber(&posting_item->key), + BlockIdGetBlockNumber(&posting_item->child_blkno)); + else + elog(DEBUG3, "key (%u, %u) -> %u", + 0, 0, BlockIdGetBlockNumber(&posting_item->child_blkno)); + + if (i == maxoff && rightlink == InvalidBlockNumber) + { + /* + * The rightmost item in the tree level has (0, 0) as the + * key + */ + if (ItemPointerGetBlockNumberNoCheck(&posting_item->key) != 0 || + ItemPointerGetOffsetNumberNoCheck(&posting_item->key) != 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": rightmost posting tree page (blk %u) has unexpected last key (%u, %u)", + RelationGetRelationName(rel), + stack->blkno, + ItemPointerGetBlockNumberNoCheck(&posting_item->key), + ItemPointerGetOffsetNumberNoCheck(&posting_item->key)))); + } + else if (i != FirstOffsetNumber) + { + PostingItem *previous_posting_item = GinDataPageGetPostingItem(page, i - 1); + + if (ItemPointerCompare(&posting_item->key, &previous_posting_item->key) < 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has wrong tuple order in posting tree, block %u, offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + } + + /* + * Check if this tuple is consistent with the downlink in the + * parent. + */ + if (stack->parentblk != InvalidBlockNumber && i == maxoff && + ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": posting item exceeds parent's high key in postingTree internal page on block %u offset %u", + RelationGetRelationName(rel), + stack->blkno, i))); + + /* This is an internal page, recurse into the child. */ + ptr = (GinPostingTreeScanItem *) palloc(sizeof(GinPostingTreeScanItem)); + ptr->depth = stack->depth + 1; + + /* + * Set rightmost parent key to invalid iterm pointer. Its + * value is 'Infinity' and not explicitly stored. + */ + if (rightlink == InvalidBlockNumber) + ItemPointerSetInvalid(&ptr->parentkey); + else + ptr->parentkey = posting_item->key; + + ptr->parentblk = stack->blkno; + ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno); + ptr->next = stack->next; + stack->next = ptr; + } + } + LockBuffer(buffer, GIN_UNLOCK); + ReleaseBuffer(buffer); + + /* Step to next item in the queue */ + stack_next = stack->next; + pfree(stack); + stack = stack_next; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(mctx); +} + +/* + * Main entry point for GIN checks. + * + * Allocates memory context and scans through the whole GIN graph. + */ +static void +gin_check_parent_keys_consistency(Relation rel, + Relation heaprel, + void *callback_state, + bool readonly) +{ + BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD); + GinScanItem *stack; + MemoryContext mctx; + MemoryContext oldcontext; + GinState state; + int leafdepth; + + mctx = AllocSetContextCreate(CurrentMemoryContext, + "amcheck consistency check context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(mctx); + initGinState(&state, rel); + + /* + * We don't know the height of the tree yet, but as soon as we encounter a + * leaf page, we will set 'leafdepth' to its depth. + */ + leafdepth = -1; + + /* Start the scan at the root page */ + stack = (GinScanItem *) palloc0(sizeof(GinScanItem)); + stack->depth = 0; + stack->parenttup = NULL; + stack->parentblk = InvalidBlockNumber; + stack->parentlsn = InvalidXLogRecPtr; + stack->blkno = GIN_ROOT_BLKNO; + + while (stack) + { + GinScanItem *stack_next; + Buffer buffer; + Page page; + OffsetNumber i, + maxoff, + prev_attnum; + XLogRecPtr lsn; + IndexTuple prev_tuple; + BlockNumber rightlink; + + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, + RBM_NORMAL, strategy); + LockBuffer(buffer, GIN_SHARE); + page = (Page) BufferGetPage(buffer); + lsn = BufferGetLSNAtomic(buffer); + maxoff = PageGetMaxOffsetNumber(page); + rightlink = GinPageGetOpaque(page)->rightlink; + + /* Do basic sanity checks on the page headers */ + check_index_page(rel, buffer, stack->blkno); + + elog(DEBUG3, "processing entry tree page at blk %u, maxoff: %u", stack->blkno, maxoff); + + /* + * It's possible that the page was split since we looked at the + * parent, so that we didn't missed the downlink of the right sibling + * when we scanned the parent. If so, add the right sibling to the + * stack now. + */ + if (stack->parenttup != NULL) + { + GinNullCategory parent_key_category; + Datum parent_key = gintuple_get_key(&state, + stack->parenttup, + &parent_key_category); + ItemId iid = PageGetItemIdCareful(rel, stack->blkno, + page, maxoff); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); + OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple); + GinNullCategory page_max_key_category; + Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category); + + if (rightlink != InvalidBlockNumber && + ginCompareEntries(&state, attnum, page_max_key, + page_max_key_category, parent_key, + parent_key_category) > 0) + { + /* split page detected, install right link to the stack */ + GinScanItem *ptr; + + elog(DEBUG3, "split detected for blk: %u, parent blk: %u", stack->blkno, stack->parentblk); + + ptr = (GinScanItem *) palloc(sizeof(GinScanItem)); + ptr->depth = stack->depth; + ptr->parenttup = CopyIndexTuple(stack->parenttup); + ptr->parentblk = stack->parentblk; + ptr->parentlsn = stack->parentlsn; + ptr->blkno = rightlink; + ptr->next = stack->next; + stack->next = ptr; + } + } + + /* Check that the tree has the same height in all branches */ + if (GinPageIsLeaf(page)) + { + if (leafdepth == -1) + leafdepth = stack->depth; + else if (stack->depth != leafdepth) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u", + RelationGetRelationName(rel), stack->blkno))); + } + + /* + * Check that tuples in each page are properly ordered and consistent + * with parent high key + */ + prev_tuple = NULL; + prev_attnum = InvalidAttrNumber; + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); + OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple); + GinNullCategory prev_key_category; + Datum prev_key; + GinNullCategory current_key_category; + Datum current_key; + + if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + + current_key = gintuple_get_key(&state, idxtuple, ¤t_key_category); + + /* + * First block is metadata, skip order check. Also, never check + * for high key on rightmost page, as this key is not really + * stored explicitly. + * + * Also make sure to not compare entries for different attnums, which + * may be stored on the same page. + */ + if (i != FirstOffsetNumber && attnum == prev_attnum && stack->blkno != GIN_ROOT_BLKNO && + !(i == maxoff && rightlink == InvalidBlockNumber)) + { + prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category); + if (ginCompareEntries(&state, attnum, prev_key, + prev_key_category, current_key, + current_key_category) >= 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u", + RelationGetRelationName(rel), stack->blkno, i, rightlink))); + } + + /* + * Check if this tuple is consistent with the downlink in the + * parent. + */ + if (stack->parenttup && + i == maxoff) + { + GinNullCategory parent_key_category; + Datum parent_key = gintuple_get_key(&state, + stack->parenttup, + &parent_key_category); + + if (ginCompareEntries(&state, attnum, current_key, + current_key_category, parent_key, + parent_key_category) > 0) + { + /* + * There was a discrepancy between parent and child + * tuples. We need to verify it is not a result of + * concurrent call of gistplacetopage(). So, lock parent + * and try to find downlink for current page. It may be + * missing due to concurrent page split, this is OK. + */ + pfree(stack->parenttup); + stack->parenttup = gin_refind_parent(rel, stack->parentblk, + stack->blkno, strategy); + + /* We found it - make a final check before failing */ + if (!stack->parenttup) + elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split", + stack->blkno, stack->parentblk); + else + { + parent_key = gintuple_get_key(&state, + stack->parenttup, + &parent_key_category); + + /* + * Check if it is properly adjusted. If succeed, + * procced to the next key. + */ + if (ginCompareEntries(&state, attnum, current_key, + current_key_category, parent_key, + parent_key_category) > 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent records on page %u offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + } + } + } + + /* If this is an internal page, recurse into the child */ + if (!GinPageIsLeaf(page)) + { + GinScanItem *ptr; + + ptr = (GinScanItem *) palloc(sizeof(GinScanItem)); + ptr->depth = stack->depth + 1; + /* last tuple in layer has no high key */ + if (i != maxoff && !GinPageGetOpaque(page)->rightlink) + ptr->parenttup = CopyIndexTuple(idxtuple); + else + ptr->parenttup = NULL; + ptr->parentblk = stack->blkno; + ptr->blkno = GinGetDownlink(idxtuple); + ptr->parentlsn = lsn; + ptr->next = stack->next; + stack->next = ptr; + } + /* If this item is a pointer to a posting tree, recurse into it */ + else if (GinIsPostingTree(idxtuple)) + { + BlockNumber rootPostingTree = GinGetPostingTree(idxtuple); + + gin_check_posting_tree_parent_keys_consistency(rel, rootPostingTree); + } + else + { + ItemPointer ipd; + int nipd; + + ipd = ginReadTupleWithoutState(idxtuple, &nipd); + + for (int j = 0; j < nipd; j++) + { + if (!OffsetNumberIsValid(ItemPointerGetOffsetNumber(&ipd[j]))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": posting list contains invalid heap pointer on block %u", + RelationGetRelationName(rel), stack->blkno))); + } + pfree(ipd); + } + + prev_tuple = CopyIndexTuple(idxtuple); + prev_attnum = attnum; + } + + LockBuffer(buffer, GIN_UNLOCK); + ReleaseBuffer(buffer); + + /* Step to next item in the queue */ + stack_next = stack->next; + if (stack->parenttup) + pfree(stack->parenttup); + pfree(stack); + stack = stack_next; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(mctx); +} + +/* + * Verify that a freshly-read page looks sane. + */ +static void +check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo) +{ + Page page = BufferGetPage(buffer); + + /* + * ReadBuffer verifies that every newly-read page passes + * PageHeaderIsValid, which means it either contains a reasonably sane + * page header or is all-zero. We have to defend against the all-zero + * case, however. + */ + if (PageIsNew(page)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" contains unexpected zero page at block %u", + RelationGetRelationName(rel), + BufferGetBlockNumber(buffer)), + errhint("Please REINDEX it."))); + + /* + * Additionally check that the special area looks sane. + */ + if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" contains corrupted page at block %u", + RelationGetRelationName(rel), + BufferGetBlockNumber(buffer)), + errhint("Please REINDEX it."))); + + if (GinPageIsDeleted(page)) + { + if (!GinPageIsLeaf(page)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted internal page %d", + RelationGetRelationName(rel), blockNo))); + if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted page %d with tuples", + RelationGetRelationName(rel), blockNo))); + } + else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has page %d with exceeding count of tuples", + RelationGetRelationName(rel), blockNo))); +} + +/* + * Try to re-find downlink pointing to 'blkno', in 'parentblkno'. + * + * If found, returns a palloc'd copy of the downlink tuple. Otherwise, + * returns NULL. + */ +static IndexTuple +gin_refind_parent(Relation rel, BlockNumber parentblkno, + BlockNumber childblkno, BufferAccessStrategy strategy) +{ + Buffer parentbuf; + Page parentpage; + OffsetNumber o, + parent_maxoff; + IndexTuple result = NULL; + + parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL, + strategy); + + LockBuffer(parentbuf, GIN_SHARE); + parentpage = BufferGetPage(parentbuf); + + if (GinPageIsLeaf(parentpage)) + { + UnlockReleaseBuffer(parentbuf); + return result; + } + + parent_maxoff = PageGetMaxOffsetNumber(parentpage); + for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o)) + { + ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o); + IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid); + + if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno) + { + /* Found it! Make copy and return it */ + result = CopyIndexTuple(itup); + break; + } + } + + UnlockReleaseBuffer(parentbuf); + + return result; +} + +static ItemId +PageGetItemIdCareful(Relation rel, BlockNumber block, Page page, + OffsetNumber offset) +{ + ItemId itemid = PageGetItemId(page, offset); + + if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) > + BLCKSZ - MAXALIGN(sizeof(GinPageOpaqueData))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("line pointer points past end of tuple space in index \"%s\"", + RelationGetRelationName(rel)), + errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", + block, offset, ItemIdGetOffset(itemid), + ItemIdGetLength(itemid), + ItemIdGetFlags(itemid)))); + + /* + * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED or LP_DEAD, + * since GIN never uses all three. Verify that line pointer has storage, + * too. + */ + if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) || + ItemIdIsDead(itemid) || ItemIdGetLength(itemid) == 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("invalid line pointer storage in index \"%s\"", + RelationGetRelationName(rel)), + errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", + block, offset, ItemIdGetOffset(itemid), + ItemIdGetLength(itemid), + ItemIdGetFlags(itemid)))); + + return itemid; +} diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index d56eb7637d39..f11c43a0ed79 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -30,6 +30,7 @@ #include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" +#include "verify_common.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "catalog/pg_opfamily_d.h" @@ -159,14 +160,22 @@ typedef struct BtreeLastVisibleEntry ItemPointer tid; /* Heap tid */ } BtreeLastVisibleEntry; +/* + * arguments for the bt_index_check_callback callback + */ +typedef struct BTCallbackState +{ + bool parentcheck; + bool heapallindexed; + bool rootdescend; + bool checkunique; +} BTCallbackState; + PG_FUNCTION_INFO_V1(bt_index_check); PG_FUNCTION_INFO_V1(bt_index_parent_check); -static void bt_index_check_internal(Oid indrelid, bool parentcheck, - bool heapallindexed, bool rootdescend, - bool checkunique); -static inline void btree_index_checkable(Relation rel); -static inline bool btree_index_mainfork_expected(Relation rel); +static void bt_index_check_callback(Relation indrel, Relation heaprel, + void *state, bool readonly); static void bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, bool readonly, bool heapallindexed, bool rootdescend, bool checkunique); @@ -241,15 +250,21 @@ Datum bt_index_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; - bool checkunique = false; + BTCallbackState args; + + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = false; + args.checkunique = false; if (PG_NARGS() >= 2) - heapallindexed = PG_GETARG_BOOL(1); - if (PG_NARGS() == 3) - checkunique = PG_GETARG_BOOL(2); + args.heapallindexed = PG_GETARG_BOOL(1); + if (PG_NARGS() >= 3) + args.checkunique = PG_GETARG_BOOL(2); - bt_index_check_internal(indrelid, false, heapallindexed, false, checkunique); + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + AccessShareLock, &args); PG_RETURN_VOID(); } @@ -267,18 +282,23 @@ Datum bt_index_parent_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; - bool rootdescend = false; - bool checkunique = false; + BTCallbackState args; + + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = true; + args.checkunique = false; if (PG_NARGS() >= 2) - heapallindexed = PG_GETARG_BOOL(1); + args.heapallindexed = PG_GETARG_BOOL(1); if (PG_NARGS() >= 3) - rootdescend = PG_GETARG_BOOL(2); - if (PG_NARGS() == 4) - checkunique = PG_GETARG_BOOL(3); + args.rootdescend = PG_GETARG_BOOL(2); + if (PG_NARGS() >= 4) + args.checkunique = PG_GETARG_BOOL(3); - bt_index_check_internal(indrelid, true, heapallindexed, rootdescend, checkunique); + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + ShareLock, &args); PG_RETURN_VOID(); } @@ -287,193 +307,46 @@ bt_index_parent_check(PG_FUNCTION_ARGS) * Helper for bt_index_[parent_]check, coordinating the bulk of the work. */ static void -bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, - bool rootdescend, bool checkunique) +bt_index_check_callback(Relation indrel, Relation heaprel, void *state, bool readonly) { - Oid heapid; - Relation indrel; - Relation heaprel; - LOCKMODE lockmode; - Oid save_userid; - int save_sec_context; - int save_nestlevel; - - if (parentcheck) - lockmode = ShareLock; - else - lockmode = AccessShareLock; - - /* - * We must lock table before index to avoid deadlocks. However, if the - * passed indrelid isn't an index then IndexGetRelation() will fail. - * Rather than emitting a not-very-helpful error message, postpone - * complaining, expecting that the is-it-an-index test below will fail. - * - * In hot standby mode this will raise an error when parentcheck is true. - */ - heapid = IndexGetRelation(indrelid, true); - if (OidIsValid(heapid)) - { - heaprel = table_open(heapid, lockmode); - - /* - * Switch to the table owner's userid, so that any index functions are - * run as that user. Also lock down security-restricted operations - * and arrange to make GUC variable changes local to this command. - */ - GetUserIdAndSecContext(&save_userid, &save_sec_context); - SetUserIdAndSecContext(heaprel->rd_rel->relowner, - save_sec_context | SECURITY_RESTRICTED_OPERATION); - save_nestlevel = NewGUCNestLevel(); - RestrictSearchPath(); - } - else - { - heaprel = NULL; - /* Set these just to suppress "uninitialized variable" warnings */ - save_userid = InvalidOid; - save_sec_context = -1; - save_nestlevel = -1; - } + BTCallbackState *args = (BTCallbackState *) state; + bool heapkeyspace, + allequalimage; - /* - * Open the target index relations separately (like relation_openrv(), but - * with heap relation locked first to prevent deadlocking). In hot - * standby mode this will raise an error when parentcheck is true. - * - * There is no need for the usual indcheckxmin usability horizon test - * here, even in the heapallindexed case, because index undergoing - * verification only needs to have entries for a new transaction snapshot. - * (If this is a parentcheck verification, there is no question about - * committed or recently dead heap tuples lacking index entries due to - * concurrent activity.) - */ - indrel = index_open(indrelid, lockmode); - - /* - * Since we did the IndexGetRelation call above without any lock, it's - * barely possible that a race against an index drop/recreation could have - * netted us the wrong table. - */ - if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("could not open parent table of index \"%s\"", + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" lacks a main relation fork", RelationGetRelationName(indrel)))); - /* Relation suitable for checking as B-Tree? */ - btree_index_checkable(indrel); - - if (btree_index_mainfork_expected(indrel)) + /* Extract metadata from metapage, and sanitize it in passing */ + _bt_metaversion(indrel, &heapkeyspace, &allequalimage); + if (allequalimage && !heapkeyspace) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", + RelationGetRelationName(indrel)))); + if (allequalimage && !_bt_allequalimage(indrel, false)) { - bool heapkeyspace, - allequalimage; + bool has_interval_ops = false; - if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" lacks a main relation fork", - RelationGetRelationName(indrel)))); - - /* Extract metadata from metapage, and sanitize it in passing */ - _bt_metaversion(indrel, &heapkeyspace, &allequalimage); - if (allequalimage && !heapkeyspace) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", - RelationGetRelationName(indrel)))); - if (allequalimage && !_bt_allequalimage(indrel, false)) - { - bool has_interval_ops = false; - - for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++) - if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID) - has_interval_ops = true; - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", - RelationGetRelationName(indrel)), - has_interval_ops - ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.") - : 0)); - } - - /* Check index, possibly against table it is an index on */ - bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck, - heapallindexed, rootdescend, checkunique); + for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++) + if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID) + { + has_interval_ops = true; + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", + RelationGetRelationName(indrel)), + has_interval_ops + ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.") + : 0)); + } } - /* Roll back any GUC changes executed by index functions */ - AtEOXact_GUC(false, save_nestlevel); - - /* Restore userid and security context */ - SetUserIdAndSecContext(save_userid, save_sec_context); - - /* - * Release locks early. That's ok here because nothing in the called - * routines will trigger shared cache invalidations to be sent, so we can - * relax the usual pattern of only releasing locks after commit. - */ - index_close(indrel, lockmode); - if (heaprel) - table_close(heaprel, lockmode); -} - -/* - * Basic checks about the suitability of a relation for checking as a B-Tree - * index. - * - * NB: Intentionally not checking permissions, the function is normally not - * callable by non-superusers. If granted, it's useful to be able to check a - * whole cluster. - */ -static inline void -btree_index_checkable(Relation rel) -{ - if (rel->rd_rel->relkind != RELKIND_INDEX || - rel->rd_rel->relam != BTREE_AM_OID) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("only B-Tree indexes are supported as targets for verification"), - errdetail("Relation \"%s\" is not a B-Tree index.", - RelationGetRelationName(rel)))); - - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot access temporary tables of other sessions"), - errdetail("Index \"%s\" is associated with temporary relation.", - RelationGetRelationName(rel)))); - - if (!rel->rd_index->indisvalid) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot check index \"%s\"", - RelationGetRelationName(rel)), - errdetail("Index is not valid."))); -} - -/* - * Check if B-Tree index relation should have a file for its main relation - * fork. Verification uses this to skip unlogged indexes when in hot standby - * mode, where there is simply nothing to verify. We behave as if the - * relation is empty. - * - * NB: Caller should call btree_index_checkable() before calling here. - */ -static inline bool -btree_index_mainfork_expected(Relation rel) -{ - if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || - !RecoveryInProgress()) - return true; - - ereport(DEBUG1, - (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), - errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", - RelationGetRelationName(rel)))); - - return false; + /* Check index, possibly against table it is an index on */ + bt_check_every_level(indrel, heaprel, heapkeyspace, readonly, + args->heapallindexed, args->rootdescend, args->checkunique); } /* diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml index a12aa3abf01a..98f836e15e79 100644 --- a/doc/src/sgml/amcheck.sgml +++ b/doc/src/sgml/amcheck.sgml @@ -188,6 +188,26 @@ ORDER BY c.relpages DESC LIMIT 10; + + + + gin_index_check(index regclass) returns void + + gin_index_check + + + + + + gin_index_check tests that its target GIN index + has consistent parent-child tuples relations (no parent tuples + require tuple adjustement) and page graph respects balanced-tree + invariants (internal pages reference only leaf page or only internal + pages). + + + + diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index b08073162128..742bcbad499f 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -237,10 +237,10 @@ GIN packs keys and downlinks into tuples in a different way. P_i is grouped with K_{i+1}. -Inf key is not needed. -There are couple of additional notes regarding K_{n+1} key. -1) In entry tree rightmost page, a key coupled with P_n doesn't really matter. +There are a couple of additional notes regarding K_{n+1} key. +1) In the entry tree on the rightmost page, a key coupled with P_n doesn't really matter. Highkey is assumed to be infinity. -2) In posting tree, a key coupled with P_n always doesn't matter. Highkey for +2) In the posting tree, a key coupled with P_n always doesn't matter. Highkey for non-rightmost pages is stored separately and accessed via GinDataPageGetRightBound(). diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 1279b69422a5..b66cecd87991 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -194,6 +194,7 @@ BOOLEAN BOX BTArrayKeyInfo BTBuildState +BTCallbackState BTCycleId BTDedupInterval BTDedupState @@ -1052,8 +1053,10 @@ GinPageOpaque GinPageOpaqueData GinPlaceToPageRC GinPostingList +GinPostingTreeScanItem GinQualCounts GinScanEntry +GinScanItem GinScanKey GinScanOpaque GinScanOpaqueData