summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Haas2016-06-15 18:33:58 +0000
committerRobert Haas2016-06-15 18:33:58 +0000
commite472ce9624e0f2083c8fd25ea1acb081be908f8f (patch)
tree3b72ed0481ab470b40b0ec8a84450665559fdd94
parent38e9f90a227d1e60e7b4691d1a71fefaba6059e5 (diff)
Add integrity-checking functions to pg_visibility.
The new pg_check_visible() and pg_check_frozen() functions can be used to verify that the visibility map bits for a relation's data pages match the actual state of the tuples on those pages. Amit Kapila and Robert Haas, reviewed (in earlier versions) by Andres Freund. Additional testing help by Thomas Munro.
-rw-r--r--contrib/pg_visibility/Makefile2
-rw-r--r--contrib/pg_visibility/pg_visibility--1.0--1.1.sql17
-rw-r--r--contrib/pg_visibility/pg_visibility--1.1.sql (renamed from contrib/pg_visibility/pg_visibility--1.0.sql)17
-rw-r--r--contrib/pg_visibility/pg_visibility.c313
-rw-r--r--contrib/pg_visibility/pg_visibility.control2
-rw-r--r--doc/src/sgml/pgvisibility.sgml28
-rw-r--r--src/tools/pgindent/typedefs.list1
7 files changed, 376 insertions, 4 deletions
diff --git a/contrib/pg_visibility/Makefile b/contrib/pg_visibility/Makefile
index fbbaa2e512b..379591a0984 100644
--- a/contrib/pg_visibility/Makefile
+++ b/contrib/pg_visibility/Makefile
@@ -4,7 +4,7 @@ MODULE_big = pg_visibility
OBJS = pg_visibility.o $(WIN32RES)
EXTENSION = pg_visibility
-DATA = pg_visibility--1.0.sql
+DATA = pg_visibility--1.1.sql pg_visibility--1.0--1.1.sql
PGFILEDESC = "pg_visibility - page visibility information"
ifdef USE_PGXS
diff --git a/contrib/pg_visibility/pg_visibility--1.0--1.1.sql b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql
new file mode 100644
index 00000000000..2c97dfd03c2
--- /dev/null
+++ b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql
@@ -0,0 +1,17 @@
+/* contrib/pg_visibility/pg_visibility--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_visibility UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION pg_check_frozen(regclass, t_ctid OUT tid)
+RETURNS SETOF tid
+AS 'MODULE_PATHNAME', 'pg_check_frozen'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_check_visible(regclass, t_ctid OUT tid)
+RETURNS SETOF tid
+AS 'MODULE_PATHNAME', 'pg_check_visible'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC;
+REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC;
diff --git a/contrib/pg_visibility/pg_visibility--1.0.sql b/contrib/pg_visibility/pg_visibility--1.1.sql
index da511e5be98..b49b644996f 100644
--- a/contrib/pg_visibility/pg_visibility--1.0.sql
+++ b/contrib/pg_visibility/pg_visibility--1.1.sql
@@ -1,4 +1,4 @@
-/* contrib/pg_visibility/pg_visibility--1.0.sql */
+/* contrib/pg_visibility/pg_visibility--1.1.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pg_visibility" to load this file. \quit
@@ -44,9 +44,24 @@ RETURNS record
AS 'MODULE_PATHNAME', 'pg_visibility_map_summary'
LANGUAGE C STRICT;
+-- Show tupleids of non-frozen tuples if any in all_frozen pages
+-- for a relation.
+CREATE FUNCTION pg_check_frozen(regclass, t_ctid OUT tid)
+RETURNS SETOF tid
+AS 'MODULE_PATHNAME', 'pg_check_frozen'
+LANGUAGE C STRICT;
+
+-- Show tupleids of dead tuples if any in all_visible pages for a relation.
+CREATE FUNCTION pg_check_visible(regclass, t_ctid OUT tid)
+RETURNS SETOF tid
+AS 'MODULE_PATHNAME', 'pg_check_visible'
+LANGUAGE C STRICT;
+
-- Don't want these to be available to public.
REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_visibility_map(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC;
+REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC;
+REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC;
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 9edf239819c..abb92f388a3 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -14,6 +14,7 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
+#include "storage/procarray.h"
#include "utils/rel.h"
PG_MODULE_MAGIC;
@@ -25,14 +26,28 @@ typedef struct vbits
uint8 bits[FLEXIBLE_ARRAY_MEMBER];
} vbits;
+typedef struct corrupt_items
+{
+ BlockNumber next;
+ BlockNumber count;
+ ItemPointer tids;
+} corrupt_items;
+
PG_FUNCTION_INFO_V1(pg_visibility_map);
PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
PG_FUNCTION_INFO_V1(pg_visibility);
PG_FUNCTION_INFO_V1(pg_visibility_rel);
PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
+PG_FUNCTION_INFO_V1(pg_check_frozen);
+PG_FUNCTION_INFO_V1(pg_check_visible);
static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
static vbits *collect_visibility_data(Oid relid, bool include_pd);
+static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
+ bool all_frozen);
+static void record_corrupt_item(corrupt_items *items, ItemPointer tid);
+static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
+ Buffer buffer);
/*
* Visibility map information for a single block of a relation.
@@ -259,6 +274,68 @@ pg_visibility_map_summary(PG_FUNCTION_ARGS)
}
/*
+ * Return the TIDs of non-frozen tuples present in pages marked all-frozen
+ * in the visibility map. We hope no one will ever find any, but there could
+ * be bugs, database corruption, etc.
+ */
+Datum
+pg_check_frozen(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ corrupt_items *items;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ Oid relid = PG_GETARG_OID(0);
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ funcctx->user_fctx = collect_corrupt_items(relid, false, true);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ items = (corrupt_items *) funcctx->user_fctx;
+
+ if (items->next < items->count)
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Return the TIDs of not-all-visible tuples in pages marked all-visible
+ * in the visibility map. We hope no one will ever find any, but there could
+ * be bugs, database corruption, etc.
+ */
+Datum
+pg_check_visible(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ corrupt_items *items;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ Oid relid = PG_GETARG_OID(0);
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ funcctx->user_fctx = collect_corrupt_items(relid, true, false);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ items = (corrupt_items *) funcctx->user_fctx;
+
+ if (items->next < items->count)
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
* Helper function to construct whichever TupleDesc we need for a particular
* call.
*/
@@ -348,3 +425,239 @@ collect_visibility_data(Oid relid, bool include_pd)
return info;
}
+
+/*
+ * Returns a list of items whose visibility map information does not match
+ * the status of the tuples on the page.
+ *
+ * If all_visible is passed as true, this will include all items which are
+ * on pages marked as all-visible in the visibility map but which do not
+ * seem to in fact be all-visible.
+ *
+ * If all_frozen is passed as true, this will include all items which are
+ * on pages marked as all-frozen but which do not seem to in fact be frozen.
+ */
+static corrupt_items *
+collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
+{
+ Relation rel;
+ BlockNumber nblocks;
+ corrupt_items *items;
+ BlockNumber blkno;
+ Buffer vmbuffer = InvalidBuffer;
+ BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ TransactionId OldestXmin = InvalidTransactionId;
+
+ if (all_visible)
+ {
+ /* Don't pass rel; that will fail in recovery. */
+ OldestXmin = GetOldestXmin(NULL, true);
+ }
+
+ rel = relation_open(relid, AccessShareLock);
+
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_MATVIEW &&
+ rel->rd_rel->relkind != RELKIND_TOASTVALUE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table, materialized view, or TOAST table",
+ RelationGetRelationName(rel))));
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ /*
+ * Guess an initial array size. We don't expect many corrupted tuples, so
+ * start with a small array. This function uses the "next" field to track
+ * the next offset where we can store an item (which is the same thing as
+ * the number of items found so far) and the "count" field to track the
+ * number of entries allocated. We'll repurpose these fields before
+ * returning.
+ */
+ items = palloc0(sizeof(corrupt_items));
+ items->next = 0;
+ items->count = 64;
+ items->tids = palloc(items->count * sizeof(ItemPointerData));
+
+ /* Loop over every block in the relation. */
+ for (blkno = 0; blkno < nblocks; ++blkno)
+ {
+ bool check_frozen = false;
+ bool check_visible = false;
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum,
+ maxoff;
+
+ /* Make sure we are interruptible. */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Use the visibility map to decide whether to check this page. */
+ if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer))
+ check_frozen = true;
+ if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
+ check_visible = true;
+ if (!check_visible && !check_frozen)
+ continue;
+
+ /* Read and lock the page. */
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
+ bstrategy);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buffer);
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ /*
+ * The visibility map bits might have changed while we were acquiring
+ * the page lock. Recheck to avoid returning spurious results.
+ */
+ if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
+ check_frozen = false;
+ if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
+ check_visible = false;
+ if (!check_visible && !check_frozen)
+ {
+ UnlockReleaseBuffer(buffer);
+ continue;
+ }
+
+ /* Iterate over each tuple on the page. */
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ HeapTupleData tuple;
+ ItemId itemid;
+
+ itemid = PageGetItemId(page, offnum);
+
+ /* Unused or redirect line pointers are of no interest. */
+ if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
+ continue;
+
+ /* Dead line pointers are neither all-visible nor frozen. */
+ if (ItemIdIsDead(itemid))
+ {
+ ItemPointerData tid;
+
+ ItemPointerSet(&tid, blkno, offnum);
+ record_corrupt_item(items, &tid);
+ continue;
+ }
+
+ /* Initialize a HeapTupleData structure for checks below. */
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = relid;
+
+ /*
+ * If we're checking whether the page is all-visible, we expect
+ * the tuple to be all-visible.
+ */
+ if (check_visible &&
+ !tuple_all_visible(&tuple, OldestXmin, buffer))
+ {
+ TransactionId RecomputedOldestXmin;
+
+ /*
+ * Time has passed since we computed OldestXmin, so it's
+ * possible that this tuple is all-visible in reality even
+ * though it doesn't appear so based on our
+ * previously-computed value. Let's compute a new value so we
+ * can be certain whether there is a problem.
+ *
+ * From a concurrency point of view, it sort of sucks to
+ * retake ProcArrayLock here while we're holding the buffer
+ * exclusively locked, but it should be safe against
+ * deadlocks, because surely GetOldestXmin() should never take
+ * a buffer lock. And this shouldn't happen often, so it's
+ * worth being careful so as to avoid false positives.
+ */
+ RecomputedOldestXmin = GetOldestXmin(NULL, true);
+
+ if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
+ record_corrupt_item(items, &tuple.t_data->t_ctid);
+ else
+ {
+ OldestXmin = RecomputedOldestXmin;
+ if (!tuple_all_visible(&tuple, OldestXmin, buffer))
+ record_corrupt_item(items, &tuple.t_data->t_ctid);
+ }
+ }
+
+ /*
+ * If we're checking whether the page is all-frozen, we expect the
+ * tuple to be in a state where it will never need freezing.
+ */
+ if (check_frozen)
+ {
+ if (heap_tuple_needs_eventual_freeze(tuple.t_data))
+ record_corrupt_item(items, &tuple.t_data->t_ctid);
+ }
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /* Clean up. */
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ relation_close(rel, AccessShareLock);
+
+ /*
+ * Before returning, repurpose the fields to match caller's expectations.
+ * next is now the next item that should be read (rather than written) and
+ * count is now the number of items we wrote (rather than the number we
+ * allocated).
+ */
+ items->count = items->next;
+ items->next = 0;
+
+ return items;
+}
+
+/*
+ * Remember one corrupt item.
+ */
+static void
+record_corrupt_item(corrupt_items *items, ItemPointer tid)
+{
+ /* enlarge output array if needed. */
+ if (items->next >= items->count)
+ {
+ items->count *= 2;
+ items->tids = repalloc(items->tids,
+ items->count * sizeof(ItemPointerData));
+ }
+ /* and add the new item */
+ items->tids[items->next++] = *tid;
+}
+
+/*
+ * Check whether a tuple is all-visible relative to a given OldestXmin value.
+ * The buffer should contain the tuple and should be locked and pinned.
+ */
+static bool
+tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
+{
+ HTSV_Result state;
+ TransactionId xmin;
+
+ state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
+ if (state != HEAPTUPLE_LIVE)
+ return false; /* all-visible implies live */
+
+ /*
+ * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
+ * all-visible unless every tuple is hinted committed. However, those hint
+ * bits could be lost after a crash, so we can't be certain that they'll
+ * be set here. So just check the xmin.
+ */
+
+ xmin = HeapTupleHeaderGetXmin(tup->t_data);
+ if (!TransactionIdPrecedes(xmin, OldestXmin))
+ return false; /* xmin not old enough for all to see */
+
+ return true;
+}
diff --git a/contrib/pg_visibility/pg_visibility.control b/contrib/pg_visibility/pg_visibility.control
index 1d7185351ed..f93ed0176ec 100644
--- a/contrib/pg_visibility/pg_visibility.control
+++ b/contrib/pg_visibility/pg_visibility.control
@@ -1,5 +1,5 @@
# pg_visibility extension
comment = 'examine the visibility map (VM) and page-level visibility info'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/pg_visibility'
relocatable = true
diff --git a/doc/src/sgml/pgvisibility.sgml b/doc/src/sgml/pgvisibility.sgml
index 48b003d1516..4cdca7dada9 100644
--- a/doc/src/sgml/pgvisibility.sgml
+++ b/doc/src/sgml/pgvisibility.sgml
@@ -32,7 +32,8 @@
Functions which display information about <literal>PD_ALL_VISIBLE</>
are much more costly than those which only consult the visibility map,
because they must read the relation's data blocks rather than only the
- (much smaller) visibility map.
+ (much smaller) visibility map. Functions that check the relation's
+ data blocks are similarly expensive.
</para>
<sect2>
@@ -92,6 +93,31 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><function>pg_check_frozen(regclass, t_ctid OUT tid) returns setof tid</function></term>
+
+ <listitem>
+ <para>
+ Returns the TIDs of non-frozen tuples present in pages marked all-frozen
+ in the visibility map. If this function returns a non-empty set of
+ TIDs, the database is corrupt.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><function>pg_check_visible(regclass, t_ctid OUT tid) returns setof tid</function></term>
+
+ <listitem>
+ <para>
+ Returns the TIDs of tuples which are not all-visible despite the fact
+ that the pages which contain them are marked as all-visible in the
+ visibility map. If this function returns a non-empty set of TIDs, the
+ database is corrupt.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
<para>
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9aa29f6a953..0c61fc287ad 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2372,6 +2372,7 @@ convert_testexpr_context
core_YYSTYPE
core_yy_extra_type
core_yyscan_t
+corrupt_items
cost_qual_eval_context
count_agg_clauses_context
create_upper_paths_hook_type