From 72b3c092aaa39f6018c1191ad184bdecec4487de Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 16 Jan 2025 16:31:55 -0500 Subject: [PATCH] Trigger more frequent autovacuums with relallfrozen Calculate the insert threshold for triggering an autovacuum of a relation based on the number of unfrozen pages. By only considering the "active" (unfrozen) portion of the table when calculating how many tuples to add to the insert threshold, we can trigger more frequent vacuums of insert-heavy tables and increase the chances of vacuuming those pages when they still reside in shared buffers. This uses the recently added (99f8f3fbbc8f) relallfrozen column of pg_class. Reviewed-by: Nathan Bossart Reviewed-by: Greg Sabino Mullane Discussion: https://postgr.es/m/flat/CAAKRu_aj-P7YyBz_cPNwztz6ohP%2BvWis%3Diz3YcomkB3NpYA--w%40mail.gmail.com --- doc/src/sgml/catalogs.sgml | 7 ++--- doc/src/sgml/config.sgml | 15 +++++------ src/backend/postmaster/autovacuum.c | 27 ++++++++++++++++--- src/backend/utils/misc/postgresql.conf.sample | 4 +-- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 9a21a0d6f157..a3447ad7d4b3 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2072,9 +2072,10 @@ SCRAM-SHA-256$<iteration count>:&l Number of pages that are marked all-frozen in the table's visibility - map. This is only an estimate and can be used along with - relallvisible for scheduling vacuums and - tuning vacuum's freezing + map. This is only an estimate used for triggering autovacuums. It can + also be used along with relallvisible for + scheduling vacuums and tuning vacuum's freezing behavior. It is updated by diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e55700f35b89..d2fa5f7d1a9f 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8773,14 +8773,13 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; - Specifies a fraction of the table size to add to - autovacuum_vacuum_insert_threshold - when deciding whether to trigger a VACUUM. - The default is 0.2 (20% of table size). - This parameter can only be set in the postgresql.conf - file or on the server command line; - but the setting can be overridden for individual tables by - changing table storage parameters. + Specifies a fraction of the unfrozen pages in the table to add to + autovacuum_vacuum_insert_threshold when deciding + whether to trigger a VACUUM. The default is + 0.2 (20% of unfrozen pages in table). This + parameter can only be set in the postgresql.conf + file or on the server command line; but the setting can be overridden + for individual tables by changing table storage parameters. diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index ddb303f5201b..0aca7d78b90c 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -2938,7 +2938,6 @@ relation_needs_vacanalyze(Oid relid, { bool force_vacuum; bool av_enabled; - float4 reltuples; /* pg_class.reltuples */ /* constants from reloptions or GUC variables */ int vac_base_thresh, @@ -3052,7 +3051,11 @@ relation_needs_vacanalyze(Oid relid, */ if (PointerIsValid(tabentry) && AutoVacuumingActive()) { - reltuples = classForm->reltuples; + float4 pcnt_unfrozen = 1; + float4 reltuples = classForm->reltuples; + int32 relpages = classForm->relpages; + int32 relallfrozen = classForm->relallfrozen; + vactuples = tabentry->dead_tuples; instuples = tabentry->ins_since_vacuum; anltuples = tabentry->mod_since_analyze; @@ -3061,11 +3064,29 @@ relation_needs_vacanalyze(Oid relid, if (reltuples < 0) reltuples = 0; + /* + * If we have data for relallfrozen, calculate the unfrozen percentage + * of the table to modify insert scale factor. This helps us decide + * whether or not to vacuum an insert-heavy table based on the number + * of inserts to the "active" part of the table. + */ + if (relpages > 0 && relallfrozen > 0) + { + /* + * It could be the stats were updated manually and relallfrozen > + * relpages. Clamp relallfrozen to relpages to avoid nonsensical + * calculations. + */ + relallfrozen = Min(relallfrozen, relpages); + pcnt_unfrozen = 1 - ((float4) relallfrozen / relpages); + } + vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples; if (vac_max_thresh >= 0 && vacthresh > (float4) vac_max_thresh) vacthresh = (float4) vac_max_thresh; - vacinsthresh = (float4) vac_ins_base_thresh + vac_ins_scale_factor * reltuples; + vacinsthresh = (float4) vac_ins_base_thresh + + vac_ins_scale_factor * reltuples * pcnt_unfrozen; anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples; /* diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 5362ff805195..8ef9b1a25253 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -675,8 +675,8 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate #autovacuum_analyze_threshold = 50 # min number of row updates before # analyze #autovacuum_vacuum_scale_factor = 0.2 # fraction of table size before vacuum -#autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table - # size before insert vacuum +#autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of unfrozen pages + # inserted to before insert vacuum #autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze #autovacuum_vacuum_max_threshold = 100000000 # max number of row updates # before vacuum; -1 disables max