diff options
| author | Masahiko Sawada | 2024-04-02 01:15:37 +0000 |
|---|---|---|
| committer | Masahiko Sawada | 2024-04-02 01:15:37 +0000 |
| commit | 667e65aac354975c6f8090c6146fceb8d7b762d6 (patch) | |
| tree | ecefa5c922788f32aa643e4639561d7ba9ac1801 /src/backend/commands/vacuum.c | |
| parent | d5d2205c8ddc6670fa87474e172fdfab162b7a73 (diff) | |
Use TidStore for dead tuple TIDs storage during lazy vacuum.
Previously, we used a simple array for storing dead tuple IDs during
lazy vacuum, which had a number of problems:
* The array used a single allocation and so was limited to 1GB.
* The allocation was pessimistically sized according to table size.
* Lookup with binary search was slow because of poor CPU cache and
branch prediction behavior.
This commit replaces that array with the TID store from commit
30e144287a.
Since the backing radix tree makes small allocations as needed, the
1GB limit is now gone. Further, the total memory used is now often
smaller by an order of magnitude or more, depending on the
distribution of blocks and offsets. These two features should make
multiple rounds of heap scanning and index cleanup an extremely rare
event. TID lookup during index cleanup is also several times faster,
even more so when index order is correlated with heap tuple order.
Since there is no longer a predictable relationship between the number
of dead tuples vacuumed and the space taken up by their TIDs, the
number of tuples no longer provides any meaningful insights for users,
nor is the maximum number predictable. For that reason this commit
also changes to byte-based progress reporting, with the relevant
columns of pg_stat_progress_vacuum renamed accordingly to
max_dead_tuple_bytes and dead_tuple_bytes.
For parallel vacuum, both the TID store and supplemental information
specific to vacuum are shared among the parallel vacuum workers. As
with the previous array, we don't take any locks on TidStore during
parallel vacuum since writes are still only done by the leader
process.
Bump catalog version.
Reviewed-by: John Naylor, (in an earlier version) Dilip Kumar
Discussion: https://postgr.es/m/CAD21AoAfOZvmfR0j8VmZorZjL7RhTiQdVttNuC4W-Shdc2a-AA%40mail.gmail.com
Diffstat (limited to 'src/backend/commands/vacuum.c')
| -rw-r--r-- | src/backend/commands/vacuum.c | 78 |
1 files changed, 5 insertions, 73 deletions
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index e63c86cae45..b589279d49f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -116,7 +116,6 @@ static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, static double compute_parallel_delay(void); static VacOptValue get_vacoptval_from_boolean(DefElem *def); static bool vac_tid_reaped(ItemPointer itemptr, void *state); -static int vac_cmp_itemptr(const void *left, const void *right); /* * GUC check function to ensure GUC value specified is within the allowable @@ -2489,16 +2488,16 @@ get_vacoptval_from_boolean(DefElem *def) */ IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, - VacDeadItems *dead_items) + TidStore *dead_items, VacDeadItemsInfo *dead_items_info) { /* Do bulk deletion */ istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped, (void *) dead_items); ereport(ivinfo->message_level, - (errmsg("scanned index \"%s\" to remove %d row versions", + (errmsg("scanned index \"%s\" to remove %lld row versions", RelationGetRelationName(ivinfo->index), - dead_items->num_items))); + (long long) dead_items_info->num_items))); return istat; } @@ -2530,81 +2529,14 @@ vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat) } /* - * Returns the total required space for VACUUM's dead_items array given a - * max_items value. - */ -Size -vac_max_items_to_alloc_size(int max_items) -{ - Assert(max_items <= MAXDEADITEMS(MaxAllocSize)); - - return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items; -} - -/* * vac_tid_reaped() -- is a particular tid deletable? * * This has the right signature to be an IndexBulkDeleteCallback. - * - * Assumes dead_items array is sorted (in ascending TID order). */ static bool vac_tid_reaped(ItemPointer itemptr, void *state) { - VacDeadItems *dead_items = (VacDeadItems *) state; - int64 litem, - ritem, - item; - ItemPointer res; - - litem = itemptr_encode(&dead_items->items[0]); - ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]); - item = itemptr_encode(itemptr); - - /* - * Doing a simple bound check before bsearch() is useful to avoid the - * extra cost of bsearch(), especially if dead items on the heap are - * concentrated in a certain range. Since this function is called for - * every index tuple, it pays to be really fast. - */ - if (item < litem || item > ritem) - return false; - - res = (ItemPointer) bsearch(itemptr, - dead_items->items, - dead_items->num_items, - sizeof(ItemPointerData), - vac_cmp_itemptr); - - return (res != NULL); -} - -/* - * Comparator routines for use with qsort() and bsearch(). - */ -static int -vac_cmp_itemptr(const void *left, const void *right) -{ - BlockNumber lblk, - rblk; - OffsetNumber loff, - roff; - - lblk = ItemPointerGetBlockNumber((ItemPointer) left); - rblk = ItemPointerGetBlockNumber((ItemPointer) right); - - if (lblk < rblk) - return -1; - if (lblk > rblk) - return 1; - - loff = ItemPointerGetOffsetNumber((ItemPointer) left); - roff = ItemPointerGetOffsetNumber((ItemPointer) right); - - if (loff < roff) - return -1; - if (loff > roff) - return 1; + TidStore *dead_items = (TidStore *) state; - return 0; + return TidStoreIsMember(dead_items, itemptr); } |
