diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 658986de6f83..9005b4253b40 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -285,6 +285,10 @@ typedef int DumpId; * Function pointer prototypes for assorted callback methods. */ +/* forward declaration to avoid including pg_backup_archiver.h here */ +typedef struct _tocEntry TocEntry; + +typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg, const TocEntry *te); typedef int (*DataDumperPtr) (Archive *AH, const void *userArg); typedef void (*SetupWorkerPtrType) (Archive *AH); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 1d131e5a57d4..7343867584a5 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -72,7 +72,7 @@ static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te); static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te); static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te); static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH); -static RestorePass _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te); +static RestorePass _tocEntryRestorePass(TocEntry *te); static bool _tocEntryIsACL(TocEntry *te); static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); @@ -102,8 +102,7 @@ static void pending_list_append(TocEntry *l, TocEntry *te); static void pending_list_remove(TocEntry *te); static int TocEntrySizeCompareQsort(const void *p1, const void *p2); static int TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg); -static void move_to_ready_heap(ArchiveHandle *AH, - TocEntry *pending_list, +static void move_to_ready_heap(TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass); static TocEntry *pop_next_work_item(binaryheap *ready_heap, @@ -749,7 +748,7 @@ RestoreArchive(Archive *AHX) if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0) continue; /* ignore if not to be dumped at all */ - switch (_tocEntryRestorePass(AH, te)) + switch (_tocEntryRestorePass(te)) { case RESTORE_PASS_MAIN: (void) restore_toc_entry(AH, te, false); @@ -768,7 +767,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(AH, te) == RESTORE_PASS_ACL) + _tocEntryRestorePass(te) == RESTORE_PASS_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -778,7 +777,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(AH, te) == RESTORE_PASS_POST_ACL) + _tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -1266,6 +1265,9 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId, newToc->dataDumperArg = opts->dumpArg; newToc->hadDumper = opts->dumpFn ? true : false; + newToc->defnDumper = opts->defnFn; + newToc->defnDumperArg = opts->defnArg; + newToc->formatData = NULL; newToc->dataLength = 0; @@ -2621,7 +2623,40 @@ WriteToc(ArchiveHandle *AH) WriteStr(AH, te->tag); WriteStr(AH, te->desc); WriteInt(AH, te->section); - WriteStr(AH, te->defn); + + if (te->defnLen) + { + /* + * defnLen should only be set for custom format's second call to + * WriteToc(), which rewrites the TOC in place to update any data + * offsets. Rather than call the defnDumper a second time (which + * would involve executing the queries again), just skip writing + * the entry. While regenerating the definition should in theory + * produce the same result as before, it's expensive and feels + * risky. + * + * The custom format only does a second WriteToc() if fseeko() is + * usable (see _CloseArchive() in pg_backup_custom.c), so we can + * just use it without checking. For other formats, we fail + * because this assumption must no longer hold true. + */ + if (AH->format != archCustom) + pg_fatal("unexpected TOC entry in WriteToc(): %d %s %s", + te->dumpId, te->desc, te->tag); + + if (fseeko(AH->FH, te->defnLen, SEEK_CUR != 0)) + pg_fatal("error during file seek: %m"); + } + else if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te); + + te->defnLen = WriteStr(AH, defn); + pg_free(defn); + } + else + WriteStr(AH, te->defn); + WriteStr(AH, te->dropStmt); WriteStr(AH, te->copyStmt); WriteStr(AH, te->namespace); @@ -3220,7 +3255,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) * See notes with the RestorePass typedef in pg_backup_archiver.h. */ static RestorePass -_tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te) +_tocEntryRestorePass(TocEntry *te) { /* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */ if (strcmp(te->desc, "ACL") == 0 || @@ -3243,23 +3278,17 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te) /* * If statistics data is dependent on materialized view data, it must be - * deferred to RESTORE_PASS_POST_ACL. + * deferred to RESTORE_PASS_POST_ACL. Those entries are marked with + * SECTION_POST_DATA already, and some other stats entries (e.g., stats + * for indexes) will also be marked SECTION_POST_DATA. Furthermore, our + * lookahead code in fetchAttributeStats() assumes we dump all statistics + * data entries in TOC order. To ensure this assumption holds, we move + * all statistics data entries in SECTION_POST_DATA to + * RESTORE_PASS_POST_ACL. */ - if (strcmp(te->desc, "STATISTICS DATA") == 0) - { - for (int i = 0; i < te->nDeps; i++) - { - DumpId depid = te->dependencies[i]; - - if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL) - { - TocEntry *otherte = AH->tocsByDumpId[depid]; - - if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0) - return RESTORE_PASS_POST_ACL; - } - } - } + if (strcmp(te->desc, "STATISTICS DATA") == 0 && + te->section == SECTION_POST_DATA) + return RESTORE_PASS_POST_ACL; /* All else can be handled in the main pass. */ return RESTORE_PASS_MAIN; @@ -3849,7 +3878,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) /* * Actually print the definition. Normally we can just print the defn - * string if any, but we have three special cases: + * string if any, but we have four special cases: * * 1. A crude hack for suppressing AUTHORIZATION clause that old pg_dump * versions put into CREATE SCHEMA. Don't mutate the variant for schema @@ -3862,6 +3891,10 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) * 3. ACL LARGE OBJECTS entries need special processing because they * contain only one copy of the ACL GRANT/REVOKE commands, which we must * apply to each large object listed in the associated BLOB METADATA. + * + * 4. Entries with a defnDumper need to call it to generate the + * definition. This is primarily intended to provide a way to save memory + * for objects that need a lot of it (e.g., statistics data). */ if (ropt->noOwner && strcmp(te->desc, "SCHEMA") == 0 && strncmp(te->defn, "--", 2) != 0) @@ -3877,6 +3910,39 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) { IssueACLPerBlob(AH, te); } + else if (te->defnLen && AH->format != archTar) + { + /* + * If defnLen is set, the defnDumper has already been called for this + * TOC entry. We ordinarily don't expect a defnDumper to be called on + * a TOC entry a second time in _printTocEntry(), but of course + * there's an exception. The tar format first calls WriteToc(), which + * scans through the entire TOC, and then it later calls + * RestoreArchive() to generate restore.sql, which scans through the + * TOC again. There doesn't seem to be a good way to avoid calling the + * defnDumper again in that case without storing the definition in + * memory, which is what we're trying to avoid in the first place. + * This second defnDumper invocation should generate the exact same + * output, but even if it doesn't, the worst case is that the + * restore.sql file (which isn't used by pg_restore) is incorrect. + * Past discussion on the mailing list indicates that tar format isn't + * known to be heavily used and might be a candidate for deprecation, + * so it doesn't seem worth trying much harder here. + * + * In all other cases, encountering a TOC entry a second time in + * _printTocEntry() is unexpected, so we fail because one of our + * assumptions must no longer hold true. + */ + pg_fatal("unexpected TOC entry in _printTocEntry(): %d %s %s", + te->dumpId, te->desc, te->tag); + } + else if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te); + + te->defnLen = ahprintf(AH, "%s\n\n", defn); + pg_free(defn); + } else if (te->defn && strlen(te->defn) > 0) { ahprintf(AH, "%s\n\n", te->defn); @@ -4270,7 +4336,7 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list) * not set skipped_some in this case, since by assumption no main-pass * items could depend on these. */ - if (_tocEntryRestorePass(AH, next_work_item) != RESTORE_PASS_MAIN) + if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN) do_now = false; if (do_now) @@ -4352,7 +4418,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, * process in the current restore pass. */ AH->restorePass = RESTORE_PASS_MAIN; - move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(pending_list, ready_heap, AH->restorePass); /* * main parent loop @@ -4401,7 +4467,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, /* Advance to next restore pass */ AH->restorePass++; /* That probably allows some stuff to be made ready */ - move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(pending_list, ready_heap, AH->restorePass); /* Loop around to see if anything's now ready */ continue; } @@ -4572,8 +4638,7 @@ TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg) * which applies the same logic one-at-a-time.) */ static void -move_to_ready_heap(ArchiveHandle *AH, - TocEntry *pending_list, +move_to_ready_heap(TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass) { @@ -4586,7 +4651,7 @@ move_to_ready_heap(ArchiveHandle *AH, next_te = te->pending_next; if (te->depCount == 0 && - _tocEntryRestorePass(AH, te) == pass) + _tocEntryRestorePass(te) == pass) { /* Remove it from pending_list ... */ pending_list_remove(te); @@ -4980,7 +5045,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te, * memberships changed. */ if (otherte->depCount == 0 && - _tocEntryRestorePass(AH, otherte) == AH->restorePass && + _tocEntryRestorePass(otherte) == AH->restorePass && otherte->pending_prev != NULL && ready_heap != NULL) { diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index a2064f471ede..b7ebc2b39cd1 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -368,6 +368,10 @@ struct _tocEntry const void *dataDumperArg; /* Arg for above routine */ void *formatData; /* TOC Entry data specific to file format */ + DefnDumperPtr defnDumper; /* routine to dump definition statement */ + const void *defnDumperArg; /* arg for above routine */ + size_t defnLen; /* length of dumped definition */ + /* working state while dumping/restoring */ pgoff_t dataLength; /* item's data size; 0 if none or unknown */ int reqs; /* do we need schema and/or data of object @@ -407,6 +411,8 @@ typedef struct _archiveOpts int nDeps; DataDumperPtr dumpFn; const void *dumpArg; + DefnDumperPtr defnFn; + const void *defnArg; } ArchiveOpts; #define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__} /* Called to add a TOC entry */ diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c index e44b887eb29b..f7c3af56304c 100644 --- a/src/bin/pg_dump/pg_backup_custom.c +++ b/src/bin/pg_dump/pg_backup_custom.c @@ -755,9 +755,11 @@ _CloseArchive(ArchiveHandle *AH) * If possible, re-write the TOC in order to update the data offset * information. This is not essential, as pg_restore can cope in most * cases without it; but it can make pg_restore significantly faster - * in some situations (especially parallel restore). + * in some situations (especially parallel restore). We can skip this + * step if we're not dumping any data; there are no offsets to update + * in that case. */ - if (ctx->hasSeek && + if (ctx->hasSeek && AH->public.dopt->dumpData && fseeko(AH->FH, tpos, SEEK_SET) == 0) WriteToc(AH); } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 04c87ba8854c..1b8303cd0ce9 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -209,6 +209,9 @@ static int nbinaryUpgradeClassOids = 0; static SequenceItem *sequences = NULL; static int nsequences = 0; +/* Maximum number of relations to fetch in a fetchAttributeStats() call. */ +#define MAX_ATTR_STATS_RELS 64 + /* * The default number of rows per INSERT when * --inserts is specified without --rows-per-insert @@ -10554,17 +10557,96 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, } /* - * dumpRelationStats -- + * fetchAttributeStats -- * - * Dump command to import stats into the relation on the new database. + * Fetch next batch of rows for getAttributeStats(). */ -static void -dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) +static PGresult * +fetchAttributeStats(Archive *fout) { + ArchiveHandle *AH = (ArchiveHandle *) fout; + PQExpBuffer nspnames = createPQExpBuffer(); + PQExpBuffer relnames = createPQExpBuffer(); + int count = 0; + PGresult *res = NULL; + static TocEntry *te; + static bool restarted; + + /* If we're just starting, set our TOC pointer. */ + if (!te) + te = AH->toc->next; + + /* + * We can't avoid a second TOC scan for the tar format because it writes + * restore.sql separately, which means we must execute all of our queries + * a second time. This feels risky, but there is no known reason it + * should generate different output than the first pass. Even if it does, + * the worst case is that restore.sql might have different statistics data + * than the archive. + */ + if (!restarted && te == AH->toc && AH->format == archTar) + { + te = AH->toc->next; + restarted = true; + } + + /* + * Scan the TOC for the next set of relevant stats entries. We assume + * that statistics are dumped in the order they are listed in the TOC. + * This is perhaps not the sturdiest assumption, so we verify it matches + * reality in dumpRelationStats_dumper(). + */ + for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next) + { + if (te->reqs && strcmp(te->desc, "STATISTICS DATA") == 0) + { + RelStatsInfo *rsinfo = (RelStatsInfo *) te->defnDumperArg; + + appendPQExpBuffer(nspnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(relnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.name)); + count++; + } + } + + /* Execute the query for the next batch of relations. */ + if (count > 0) + { + PQExpBuffer query = createPQExpBuffer(); + + appendPQExpBuffer(query, "EXECUTE getAttributeStats(" + "'{%s}'::pg_catalog.name[]," + "'{%s}'::pg_catalog.name[])", + nspnames->data, relnames->data); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + destroyPQExpBuffer(query); + } + + destroyPQExpBuffer(nspnames); + destroyPQExpBuffer(relnames); + return res; +} + +/* + * dumpRelationStats_dumper -- + * + * Generate command to import stats into the relation on the new database. + * This routine is called by the Archiver when it wants the statistics to be + * dumped. + */ +static char * +dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te) +{ + const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg; const DumpableObject *dobj = &rsinfo->dobj; - PGresult *res; + static PGresult *res; + static int rownum; PQExpBuffer query; - PQExpBuffer out; + PQExpBufferData out_data; + PQExpBuffer out = &out_data; + int i_schemaname; + int i_tablename; int i_attname; int i_inherited; int i_null_frac; @@ -10580,17 +10662,30 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) int i_range_length_histogram; int i_range_empty_frac; int i_range_bounds_histogram; + static TocEntry *next_te; - /* nothing to do if we are not dumping statistics */ - if (!fout->dopt->dumpStatistics) - return; + /* + * fetchAttributeStats() assumes that the statistics are dumped in the + * order they are listed in the TOC. We verify that here for safety. + */ + if (!next_te) + next_te = ((ArchiveHandle *) fout)->toc; + + next_te = next_te->next; + while (!next_te->reqs || strcmp(next_te->desc, "STATISTICS DATA") != 0) + next_te = next_te->next; + + if (te != next_te) + pg_fatal("stats dumped out of order (current: %d %s %s) (expected: %d %s %s)", + te->dumpId, te->desc, te->tag, + next_te->dumpId, next_te->desc, next_te->tag); query = createPQExpBuffer(); if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) { appendPQExpBufferStr(query, - "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n" - "SELECT s.attname, s.inherited, " + "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n" + "SELECT s.schemaname, s.tablename, s.attname, s.inherited, " "s.null_frac, s.avg_width, s.n_distinct, " "s.most_common_vals, s.most_common_freqs, " "s.histogram_bounds, s.correlation, " @@ -10608,11 +10703,21 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) "NULL AS range_empty_frac," "NULL AS range_bounds_histogram "); + /* + * The results must be in the order of the relations supplied in the + * parameters to ensure we remain in sync as we walk through the TOC. + * The redundant filter clause on s.tablename = ANY(...) seems + * sufficient to convince the planner to use the + * pg_class_relname_nsp_index, which avoids an full scan of pg_stats. + * This may not work for all versions. + */ appendPQExpBufferStr(query, "FROM pg_catalog.pg_stats s " - "WHERE s.schemaname = $1 " - "AND s.tablename = $2 " - "ORDER BY s.attname, s.inherited"); + "JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) " + "ON s.schemaname = u.schemaname " + "AND s.tablename = u.tablename " + "WHERE s.tablename = ANY($2) " + "ORDER BY u.ord, s.attname, s.inherited"); ExecuteSqlStatement(fout, query->data); @@ -10620,7 +10725,7 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) resetPQExpBuffer(query); } - out = createPQExpBuffer(); + initPQExpBuffer(out); /* restore relation stats */ appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n"); @@ -10642,16 +10747,16 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) appendPQExpBufferStr(out, "\n);\n"); + /* Fetch the next batch of attribute statistics if needed. */ + if (rownum >= PQntuples(res)) + { + PQclear(res); + res = fetchAttributeStats(fout); + rownum = 0; + } - /* fetch attribute stats */ - appendPQExpBufferStr(query, "EXECUTE getAttributeStats("); - appendStringLiteralAH(query, dobj->namespace->dobj.name, fout); - appendPQExpBufferStr(query, ", "); - appendStringLiteralAH(query, dobj->name, fout); - appendPQExpBufferStr(query, ");"); - - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - + i_schemaname = PQfnumber(res, "schemaname"); + i_tablename = PQfnumber(res, "tablename"); i_attname = PQfnumber(res, "attname"); i_inherited = PQfnumber(res, "inherited"); i_null_frac = PQfnumber(res, "null_frac"); @@ -10669,10 +10774,15 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram"); /* restore attribute stats */ - for (int rownum = 0; rownum < PQntuples(res); rownum++) + for (; rownum < PQntuples(res); rownum++) { const char *attname; + /* Stop if the next stat row in our cache isn't for this relation. */ + if (strcmp(dobj->name, PQgetvalue(res, rownum, i_tablename)) != 0 || + strcmp(dobj->namespace->dobj.name, PQgetvalue(res, rownum, i_schemaname)) != 0) + break; + appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n"); appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", fout->remoteVersion); @@ -10762,19 +10872,35 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) appendPQExpBufferStr(out, "\n);\n"); } - PQclear(res); + destroyPQExpBuffer(query); + return out->data; +} + +/* + * dumpRelationStats -- + * + * Make an ArchiveEntry for the relation statistics. The Archiver will take + * care of gathering the statistics and generating the restore commands when + * they are needed. + */ +static void +dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) +{ + const DumpableObject *dobj = &rsinfo->dobj; + + /* nothing to do if we are not dumping statistics */ + if (!fout->dopt->dumpStatistics) + return; ArchiveEntry(fout, nilCatalogId, createDumpId(), ARCHIVE_OPTS(.tag = dobj->name, .namespace = dobj->namespace->dobj.name, .description = "STATISTICS DATA", .section = rsinfo->section, - .createStmt = out->data, + .defnFn = dumpRelationStats_dumper, + .defnArg = rsinfo, .deps = dobj->dependencies, .nDeps = dobj->nDeps)); - - destroyPQExpBuffer(out); - destroyPQExpBuffer(query); } /*