Use TidStore for dead tuple TIDs storage during lazy vacuum.

Previously, we used a simple array for storing dead tuple IDs during lazy vacuum, which had a number of problems: * The array used a single allocation and so was limited to 1GB. * The allocation was pessimistically sized according to table size. * Lookup with binary search was slow because of poor CPU cache and branch prediction behavior. This commit replaces that array with the TID store from commit 30e144287a. Since the backing radix tree makes small allocations as needed, the 1GB limit is now gone. Further, the total memory used is now often smaller by an order of magnitude or more, depending on the distribution of blocks and offsets. These two features should make multiple rounds of heap scanning and index cleanup an extremely rare event. TID lookup during index cleanup is also several times faster, even more so when index order is correlated with heap tuple order. Since there is no longer a predictable relationship between the number of dead tuples vacuumed and the space taken up by their TIDs, the number of tuples no longer provides any meaningful insights for users, nor is the maximum number predictable. For that reason this commit also changes to byte-based progress reporting, with the relevant columns of pg_stat_progress_vacuum renamed accordingly to max_dead_tuple_bytes and dead_tuple_bytes. For parallel vacuum, both the TID store and supplemental information specific to vacuum are shared among the parallel vacuum workers. As with the previous array, we don't take any locks on TidStore during parallel vacuum since writes are still only done by the leader process. Bump catalog version. Reviewed-by: John Naylor, (in an earlier version) Dilip Kumar Discussion: https://postgr.es/m/CAD21AoAfOZvmfR0j8VmZorZjL7RhTiQdVttNuC4W-Shdc2a-AA%40mail.gmail.com
author: Masahiko Sawada 2024-04-02 01:15:37 +0000
committer: Masahiko Sawada 2024-04-02 01:15:37 +0000
commit: 667e65aac354975c6f8090c6146fceb8d7b762d6 (patch)
tree: ecefa5c922788f32aa643e4639561d7ba9ac1801 /src/backend/commands/vacuum.c
parent: d5d2205c8ddc6670fa87474e172fdfab162b7a73 (diff)
1 files changed, 5 insertions, 73 deletions
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e63c86cae45..b589279d49f 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -116,7 +116,6 @@ static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 static double compute_parallel_delay(void);
 static VacOptValue get_vacoptval_from_boolean(DefElem *def);
 static bool vac_tid_reaped(ItemPointer itemptr, void *state);
-static int	vac_cmp_itemptr(const void *left, const void *right);
 
 /*
  * GUC check function to ensure GUC value specified is within the allowable
@@ -2489,16 +2488,16 @@ get_vacoptval_from_boolean(DefElem *def)
  */
 IndexBulkDeleteResult *
 vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
-					  VacDeadItems *dead_items)
+					  TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
 {
 	/* Do bulk deletion */
 	istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
 							  (void *) dead_items);
 
 	ereport(ivinfo->message_level,
-			(errmsg("scanned index \"%s\" to remove %d row versions",
+			(errmsg("scanned index \"%s\" to remove %lld row versions",
 					RelationGetRelationName(ivinfo->index),
-					dead_items->num_items)));
+					(long long) dead_items_info->num_items)));
 
 	return istat;
 }
@@ -2530,81 +2529,14 @@ vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
 }
 
 /*
- * Returns the total required space for VACUUM's dead_items array given a
- * max_items value.
- */
-Size
-vac_max_items_to_alloc_size(int max_items)
-{
-	Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
-
-	return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items;
-}
-
-/*
  *	vac_tid_reaped() -- is a particular tid deletable?
  *
  *		This has the right signature to be an IndexBulkDeleteCallback.
- *
- *		Assumes dead_items array is sorted (in ascending TID order).
  */
 static bool
 vac_tid_reaped(ItemPointer itemptr, void *state)
 {
-	VacDeadItems *dead_items = (VacDeadItems *) state;
-	int64		litem,
-				ritem,
-				item;
-	ItemPointer res;
-
-	litem = itemptr_encode(&dead_items->items[0]);
-	ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
-	item = itemptr_encode(itemptr);
-
-	/*
-	 * Doing a simple bound check before bsearch() is useful to avoid the
-	 * extra cost of bsearch(), especially if dead items on the heap are
-	 * concentrated in a certain range.  Since this function is called for
-	 * every index tuple, it pays to be really fast.
-	 */
-	if (item < litem || item > ritem)
-		return false;
-
-	res = (ItemPointer) bsearch(itemptr,
-								dead_items->items,
-								dead_items->num_items,
-								sizeof(ItemPointerData),
-								vac_cmp_itemptr);
-
-	return (res != NULL);
-}
-
-/*
- * Comparator routines for use with qsort() and bsearch().
- */
-static int
-vac_cmp_itemptr(const void *left, const void *right)
-{
-	BlockNumber lblk,
-				rblk;
-	OffsetNumber loff,
-				roff;
-
-	lblk = ItemPointerGetBlockNumber((ItemPointer) left);
-	rblk = ItemPointerGetBlockNumber((ItemPointer) right);
-
-	if (lblk < rblk)
-		return -1;
-	if (lblk > rblk)
-		return 1;
-
-	loff = ItemPointerGetOffsetNumber((ItemPointer) left);
-	roff = ItemPointerGetOffsetNumber((ItemPointer) right);
-
-	if (loff < roff)
-		return -1;
-	if (loff > roff)
-		return 1;
+	TidStore   *dead_items = (TidStore *) state;
 
-	return 0;
+	return TidStoreIsMember(dead_items, itemptr);
 }
author	Masahiko Sawada	2024-04-02 01:15:37 +0000
committer	Masahiko Sawada	2024-04-02 01:15:37 +0000
commit	667e65aac354975c6f8090c6146fceb8d7b762d6 (patch)
tree	ecefa5c922788f32aa643e4639561d7ba9ac1801 /src/backend/commands/vacuum.c
parent	d5d2205c8ddc6670fa87474e172fdfab162b7a73 (diff)