Skip to content

Commit 2ac93a8

Browse files
nathan-bossartCommitfest Bot
authored and
Commitfest Bot
committed
pg_dump: Retrieve attribute statistics in batches.
Currently, pg_dump gathers attribute statistics with a query per relation, which can cause pg_dump to take significantly longer, especially when there are many tables. This commit improves matters by gathering attribute statistics for 64 relations at a time. Some simple testing showed this was the ideal batch size, but performance may vary depending on workload. This change increases the memory usage of pg_dump a bit, but that isn't expected to be too egregious and is arguably well worth the trade-off. Our lookahead code for determining the next batch of relations for which to gather attribute statistics is simple: we walk the TOC sequentially looking for eligible entries. However, the assumption that we will dump all such entries in this order doesn't hold up for dump formats that use RestoreArchive(). This is because RestoreArchive() does multiple passes through the TOC and selectively dumps certain entries each time. This is particularly troublesome for index stats and a subset of matview stats; both are in SECTION_POST_DATA, but matview stats that depend on matview data are dumped in RESTORE_PASS_POST_ACL, while all other statistics data is dumped in RESTORE_PASS_MAIN. To deal with this, this commit moves all statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL, which ensures that we always dump statistics data entries in TOC order. One convenient side effect of this change is that we can revert a decent chunk of commit a0a4601. Author: Corey Huinker <[email protected]> Co-authored-by: Nathan Bossart <[email protected]> Reviewed-by: Jeff Davis <[email protected]> Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com
1 parent 6a824d9 commit 2ac93a8

File tree

3 files changed

+155
-52
lines changed

3 files changed

+155
-52
lines changed

src/bin/pg_dump/pg_backup.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,10 @@ typedef int DumpId;
285285
* Function pointer prototypes for assorted callback methods.
286286
*/
287287

288-
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg);
288+
/* forward declaration to avoid including pg_backup_archiver.h here */
289+
typedef struct _tocEntry TocEntry;
290+
291+
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg, const TocEntry *te);
289292
typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
290293

291294
typedef void (*SetupWorkerPtrType) (Archive *AH);

src/bin/pg_dump/pg_backup_archiver.c

+24-32
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te);
7272
static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te);
7373
static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te);
7474
static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH);
75-
static RestorePass _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te);
75+
static RestorePass _tocEntryRestorePass(TocEntry *te);
7676
static bool _tocEntryIsACL(TocEntry *te);
7777
static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
7878
static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
@@ -102,8 +102,7 @@ static void pending_list_append(TocEntry *l, TocEntry *te);
102102
static void pending_list_remove(TocEntry *te);
103103
static int TocEntrySizeCompareQsort(const void *p1, const void *p2);
104104
static int TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg);
105-
static void move_to_ready_heap(ArchiveHandle *AH,
106-
TocEntry *pending_list,
105+
static void move_to_ready_heap(TocEntry *pending_list,
107106
binaryheap *ready_heap,
108107
RestorePass pass);
109108
static TocEntry *pop_next_work_item(binaryheap *ready_heap,
@@ -749,7 +748,7 @@ RestoreArchive(Archive *AHX)
749748
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0)
750749
continue; /* ignore if not to be dumped at all */
751750

752-
switch (_tocEntryRestorePass(AH, te))
751+
switch (_tocEntryRestorePass(te))
753752
{
754753
case RESTORE_PASS_MAIN:
755754
(void) restore_toc_entry(AH, te, false);
@@ -768,7 +767,7 @@ RestoreArchive(Archive *AHX)
768767
for (te = AH->toc->next; te != AH->toc; te = te->next)
769768
{
770769
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 &&
771-
_tocEntryRestorePass(AH, te) == RESTORE_PASS_ACL)
770+
_tocEntryRestorePass(te) == RESTORE_PASS_ACL)
772771
(void) restore_toc_entry(AH, te, false);
773772
}
774773
}
@@ -778,7 +777,7 @@ RestoreArchive(Archive *AHX)
778777
for (te = AH->toc->next; te != AH->toc; te = te->next)
779778
{
780779
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 &&
781-
_tocEntryRestorePass(AH, te) == RESTORE_PASS_POST_ACL)
780+
_tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL)
782781
(void) restore_toc_entry(AH, te, false);
783782
}
784783
}
@@ -2650,7 +2649,7 @@ WriteToc(ArchiveHandle *AH)
26502649
}
26512650
else if (te->defnDumper)
26522651
{
2653-
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
2652+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
26542653

26552654
te->defnLen = WriteStr(AH, defn);
26562655
pg_free(defn);
@@ -3256,7 +3255,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
32563255
* See notes with the RestorePass typedef in pg_backup_archiver.h.
32573256
*/
32583257
static RestorePass
3259-
_tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
3258+
_tocEntryRestorePass(TocEntry *te)
32603259
{
32613260
/* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */
32623261
if (strcmp(te->desc, "ACL") == 0 ||
@@ -3279,23 +3278,17 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
32793278

32803279
/*
32813280
* If statistics data is dependent on materialized view data, it must be
3282-
* deferred to RESTORE_PASS_POST_ACL.
3281+
* deferred to RESTORE_PASS_POST_ACL. Those entries are marked with
3282+
* SECTION_POST_DATA already, and some other stats entries (e.g., stats
3283+
* for indexes) will also be marked SECTION_POST_DATA. Furthermore, our
3284+
* lookahead code in fetchAttributeStats() assumes we dump all statistics
3285+
* data entries in TOC order. To ensure this assumption holds, we move
3286+
* all statistics data entries in SECTION_POST_DATA to
3287+
* RESTORE_PASS_POST_ACL.
32833288
*/
3284-
if (strcmp(te->desc, "STATISTICS DATA") == 0)
3285-
{
3286-
for (int i = 0; i < te->nDeps; i++)
3287-
{
3288-
DumpId depid = te->dependencies[i];
3289-
3290-
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
3291-
{
3292-
TocEntry *otherte = AH->tocsByDumpId[depid];
3293-
3294-
if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0)
3295-
return RESTORE_PASS_POST_ACL;
3296-
}
3297-
}
3298-
}
3289+
if (strcmp(te->desc, "STATISTICS DATA") == 0 &&
3290+
te->section == SECTION_POST_DATA)
3291+
return RESTORE_PASS_POST_ACL;
32993292

33003293
/* All else can be handled in the main pass. */
33013294
return RESTORE_PASS_MAIN;
@@ -3945,7 +3938,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
39453938
}
39463939
else if (te->defnDumper)
39473940
{
3948-
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
3941+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
39493942

39503943
te->defnLen = ahprintf(AH, "%s\n\n", defn);
39513944
pg_free(defn);
@@ -4343,7 +4336,7 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list)
43434336
* not set skipped_some in this case, since by assumption no main-pass
43444337
* items could depend on these.
43454338
*/
4346-
if (_tocEntryRestorePass(AH, next_work_item) != RESTORE_PASS_MAIN)
4339+
if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN)
43474340
do_now = false;
43484341

43494342
if (do_now)
@@ -4425,7 +4418,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
44254418
* process in the current restore pass.
44264419
*/
44274420
AH->restorePass = RESTORE_PASS_MAIN;
4428-
move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass);
4421+
move_to_ready_heap(pending_list, ready_heap, AH->restorePass);
44294422

44304423
/*
44314424
* main parent loop
@@ -4474,7 +4467,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
44744467
/* Advance to next restore pass */
44754468
AH->restorePass++;
44764469
/* That probably allows some stuff to be made ready */
4477-
move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass);
4470+
move_to_ready_heap(pending_list, ready_heap, AH->restorePass);
44784471
/* Loop around to see if anything's now ready */
44794472
continue;
44804473
}
@@ -4645,8 +4638,7 @@ TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg)
46454638
* which applies the same logic one-at-a-time.)
46464639
*/
46474640
static void
4648-
move_to_ready_heap(ArchiveHandle *AH,
4649-
TocEntry *pending_list,
4641+
move_to_ready_heap(TocEntry *pending_list,
46504642
binaryheap *ready_heap,
46514643
RestorePass pass)
46524644
{
@@ -4659,7 +4651,7 @@ move_to_ready_heap(ArchiveHandle *AH,
46594651
next_te = te->pending_next;
46604652

46614653
if (te->depCount == 0 &&
4662-
_tocEntryRestorePass(AH, te) == pass)
4654+
_tocEntryRestorePass(te) == pass)
46634655
{
46644656
/* Remove it from pending_list ... */
46654657
pending_list_remove(te);
@@ -5053,7 +5045,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
50535045
* memberships changed.
50545046
*/
50555047
if (otherte->depCount == 0 &&
5056-
_tocEntryRestorePass(AH, otherte) == AH->restorePass &&
5048+
_tocEntryRestorePass(otherte) == AH->restorePass &&
50575049
otherte->pending_prev != NULL &&
50585050
ready_heap != NULL)
50595051
{

src/bin/pg_dump/pg_dump.c

+127-19
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ static int nbinaryUpgradeClassOids = 0;
209209
static SequenceItem *sequences = NULL;
210210
static int nsequences = 0;
211211

212+
/* Maximum number of relations to fetch in a fetchAttributeStats() call. */
213+
#define MAX_ATTR_STATS_RELS 64
214+
212215
/*
213216
* The default number of rows per INSERT when
214217
* --inserts is specified without --rows-per-insert
@@ -10553,6 +10556,78 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
1055310556
appendPQExpBuffer(out, "::%s", argtype);
1055410557
}
1055510558

10559+
/*
10560+
* fetchAttributeStats --
10561+
*
10562+
* Fetch next batch of rows for getAttributeStats().
10563+
*/
10564+
static PGresult *
10565+
fetchAttributeStats(Archive *fout)
10566+
{
10567+
ArchiveHandle *AH = (ArchiveHandle *) fout;
10568+
PQExpBuffer nspnames = createPQExpBuffer();
10569+
PQExpBuffer relnames = createPQExpBuffer();
10570+
int count = 0;
10571+
PGresult *res = NULL;
10572+
static TocEntry *te;
10573+
static bool restarted;
10574+
10575+
/* If we're just starting, set our TOC pointer. */
10576+
if (!te)
10577+
te = AH->toc->next;
10578+
10579+
/*
10580+
* We can't avoid a second TOC scan for the tar format because it writes
10581+
* restore.sql separately, which means we must execute all of our queries
10582+
* a second time. This feels risky, but there is no known reason it
10583+
* should generate different output than the first pass. Even if it does,
10584+
* the worst case is that restore.sql might have different statistics data
10585+
* than the archive.
10586+
*/
10587+
if (!restarted && te == AH->toc && AH->format == archTar)
10588+
{
10589+
te = AH->toc->next;
10590+
restarted = true;
10591+
}
10592+
10593+
/*
10594+
* Scan the TOC for the next set of relevant stats entries. We assume
10595+
* that statistics are dumped in the order they are listed in the TOC.
10596+
* This is perhaps not the sturdiest assumption, so we verify it matches
10597+
* reality in dumpRelationStats_dumper().
10598+
*/
10599+
for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next)
10600+
{
10601+
if (te->reqs && strcmp(te->desc, "STATISTICS DATA") == 0)
10602+
{
10603+
RelStatsInfo *rsinfo = (RelStatsInfo *) te->defnDumperArg;
10604+
10605+
appendPQExpBuffer(nspnames, "%s%s", count ? "," : "",
10606+
fmtId(rsinfo->dobj.namespace->dobj.name));
10607+
appendPQExpBuffer(relnames, "%s%s", count ? "," : "",
10608+
fmtId(rsinfo->dobj.name));
10609+
count++;
10610+
}
10611+
}
10612+
10613+
/* Execute the query for the next batch of relations. */
10614+
if (count > 0)
10615+
{
10616+
PQExpBuffer query = createPQExpBuffer();
10617+
10618+
appendPQExpBuffer(query, "EXECUTE getAttributeStats("
10619+
"'{%s}'::pg_catalog.name[],"
10620+
"'{%s}'::pg_catalog.name[])",
10621+
nspnames->data, relnames->data);
10622+
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
10623+
destroyPQExpBuffer(query);
10624+
}
10625+
10626+
destroyPQExpBuffer(nspnames);
10627+
destroyPQExpBuffer(relnames);
10628+
return res;
10629+
}
10630+
1055610631
/*
1055710632
* dumpRelationStats_dumper --
1055810633
*
@@ -10561,14 +10636,17 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
1056110636
* dumped.
1056210637
*/
1056310638
static char *
10564-
dumpRelationStats_dumper(Archive *fout, const void *userArg)
10639+
dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te)
1056510640
{
1056610641
const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg;
1056710642
const DumpableObject *dobj = &rsinfo->dobj;
10568-
PGresult *res;
10643+
static PGresult *res;
10644+
static int rownum;
1056910645
PQExpBuffer query;
1057010646
PQExpBufferData out_data;
1057110647
PQExpBuffer out = &out_data;
10648+
int i_schemaname;
10649+
int i_tablename;
1057210650
int i_attname;
1057310651
int i_inherited;
1057410652
int i_null_frac;
@@ -10584,13 +10662,30 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1058410662
int i_range_length_histogram;
1058510663
int i_range_empty_frac;
1058610664
int i_range_bounds_histogram;
10665+
static TocEntry *next_te;
10666+
10667+
/*
10668+
* fetchAttributeStats() assumes that the statistics are dumped in the
10669+
* order they are listed in the TOC. We verify that here for safety.
10670+
*/
10671+
if (!next_te)
10672+
next_te = ((ArchiveHandle *) fout)->toc;
10673+
10674+
next_te = next_te->next;
10675+
while (!next_te->reqs || strcmp(next_te->desc, "STATISTICS DATA") != 0)
10676+
next_te = next_te->next;
10677+
10678+
if (te != next_te)
10679+
pg_fatal("stats dumped out of order (current: %d %s %s) (expected: %d %s %s)",
10680+
te->dumpId, te->desc, te->tag,
10681+
next_te->dumpId, next_te->desc, next_te->tag);
1058710682

1058810683
query = createPQExpBuffer();
1058910684
if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
1059010685
{
1059110686
appendPQExpBufferStr(query,
10592-
"PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n"
10593-
"SELECT s.attname, s.inherited, "
10687+
"PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n"
10688+
"SELECT s.schemaname, s.tablename, s.attname, s.inherited, "
1059410689
"s.null_frac, s.avg_width, s.n_distinct, "
1059510690
"s.most_common_vals, s.most_common_freqs, "
1059610691
"s.histogram_bounds, s.correlation, "
@@ -10608,11 +10703,21 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1060810703
"NULL AS range_empty_frac,"
1060910704
"NULL AS range_bounds_histogram ");
1061010705

10706+
/*
10707+
* The results must be in the order of the relations supplied in the
10708+
* parameters to ensure we remain in sync as we walk through the TOC.
10709+
* The redundant filter clause on s.tablename = ANY(...) seems
10710+
* sufficient to convince the planner to use the
10711+
* pg_class_relname_nsp_index, which avoids an full scan of pg_stats.
10712+
* This may not work for all versions.
10713+
*/
1061110714
appendPQExpBufferStr(query,
1061210715
"FROM pg_catalog.pg_stats s "
10613-
"WHERE s.schemaname = $1 "
10614-
"AND s.tablename = $2 "
10615-
"ORDER BY s.attname, s.inherited");
10716+
"JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) "
10717+
"ON s.schemaname = u.schemaname "
10718+
"AND s.tablename = u.tablename "
10719+
"WHERE s.tablename = ANY($2) "
10720+
"ORDER BY u.ord, s.attname, s.inherited");
1061610721

1061710722
ExecuteSqlStatement(fout, query->data);
1061810723

@@ -10642,16 +10747,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1064210747

1064310748
appendPQExpBufferStr(out, "\n);\n");
1064410749

10750+
/* Fetch the next batch of attribute statistics if needed. */
10751+
if (rownum >= PQntuples(res))
10752+
{
10753+
PQclear(res);
10754+
res = fetchAttributeStats(fout);
10755+
rownum = 0;
10756+
}
1064510757

10646-
/* fetch attribute stats */
10647-
appendPQExpBufferStr(query, "EXECUTE getAttributeStats(");
10648-
appendStringLiteralAH(query, dobj->namespace->dobj.name, fout);
10649-
appendPQExpBufferStr(query, ", ");
10650-
appendStringLiteralAH(query, dobj->name, fout);
10651-
appendPQExpBufferStr(query, ");");
10652-
10653-
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
10654-
10758+
i_schemaname = PQfnumber(res, "schemaname");
10759+
i_tablename = PQfnumber(res, "tablename");
1065510760
i_attname = PQfnumber(res, "attname");
1065610761
i_inherited = PQfnumber(res, "inherited");
1065710762
i_null_frac = PQfnumber(res, "null_frac");
@@ -10669,10 +10774,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1066910774
i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram");
1067010775

1067110776
/* restore attribute stats */
10672-
for (int rownum = 0; rownum < PQntuples(res); rownum++)
10777+
for (; rownum < PQntuples(res); rownum++)
1067310778
{
1067410779
const char *attname;
1067510780

10781+
/* Stop if the next stat row in our cache isn't for this relation. */
10782+
if (strcmp(dobj->name, PQgetvalue(res, rownum, i_tablename)) != 0 ||
10783+
strcmp(dobj->namespace->dobj.name, PQgetvalue(res, rownum, i_schemaname)) != 0)
10784+
break;
10785+
1067610786
appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n");
1067710787
appendPQExpBuffer(out, "\t'version', '%u'::integer,\n",
1067810788
fout->remoteVersion);
@@ -10762,8 +10872,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1076210872
appendPQExpBufferStr(out, "\n);\n");
1076310873
}
1076410874

10765-
PQclear(res);
10766-
1076710875
destroyPQExpBuffer(query);
1076810876
return out->data;
1076910877
}

0 commit comments

Comments
 (0)