diff options
| author | Tom Lane | 2025-10-16 16:50:18 +0000 |
|---|---|---|
| committer | Tom Lane | 2025-10-16 16:50:18 +0000 |
| commit | 66ec01dc41243d756896777aa66df149ac8fa31d (patch) | |
| tree | 242fa0ec2b1b75829b20e5b229e06d896c08d480 | |
| parent | 812221b204276b884d2b14ef56aabd9e1946be81 (diff) | |
Align the data block sizes of pg_dump's various compression modes.
After commit fe8192a95, compress_zstd.c tends to produce data block
sizes around 128K, and we don't really have any control over that
unless we want to overrule ZSTD_CStreamOutSize(). Which seems like
a bad idea. But let's try to align the other compression modes to
produce block sizes roughly comparable to that, so that pg_restore's
skip-data performance isn't enormously different for different modes.
gzip compression can be brought in line simply by setting
DEFAULT_IO_BUFFER_SIZE = 128K, which this patch does. That
increases some unrelated buffer sizes, but none of them seem
problematic for modern platforms.
lz4's idea of appropriate block size is highly nonlinear:
if we just increase DEFAULT_IO_BUFFER_SIZE then the output
blocks end up around 200K. I found that adjusting the slop
factor in LZ4State_compression_init was a not-too-ugly way
of bringing that number roughly into line.
With compress = none you get data blocks the same sizes as the
table rows, which seems potentially problematic for narrow tables.
Introduce a layer of buffering to make that case match the others.
Comments in compress_io.h and 002_pg_dump.pl suggest that if
we increase DEFAULT_IO_BUFFER_SIZE then we need to increase the
amount of data fed through the tests in order to improve coverage.
I've not done that here, leaving it for a separate patch.
Author: Tom Lane <[email protected]>
Discussion: https://postgr.es/m/[email protected]
| -rw-r--r-- | src/bin/pg_dump/compress_io.h | 4 | ||||
| -rw-r--r-- | src/bin/pg_dump/compress_lz4.c | 9 | ||||
| -rw-r--r-- | src/bin/pg_dump/compress_none.c | 64 | ||||
| -rw-r--r-- | src/tools/pgindent/typedefs.list | 1 |
4 files changed, 72 insertions, 6 deletions
diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h index 25a7bf0904d..ae008585c89 100644 --- a/src/bin/pg_dump/compress_io.h +++ b/src/bin/pg_dump/compress_io.h @@ -22,9 +22,9 @@ * * When changing this value, it's necessary to check the relevant test cases * still exercise all the branches. This applies especially if the value is - * increased, in which case the overflow buffer may not be needed. + * increased, in which case some loops may not get iterated. */ -#define DEFAULT_IO_BUFFER_SIZE 4096 +#define DEFAULT_IO_BUFFER_SIZE (128 * 1024) extern char *supports_compression(const pg_compress_specification compression_spec); diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c index b817a083d38..450afd4e2be 100644 --- a/src/bin/pg_dump/compress_lz4.c +++ b/src/bin/pg_dump/compress_lz4.c @@ -100,9 +100,14 @@ LZ4State_compression_init(LZ4State *state) state->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &state->prefs); /* - * Then double it, to ensure we're not forced to flush every time. + * Add some slop to ensure we're not forced to flush every time. + * + * The present slop factor of 50% is chosen so that the typical output + * block size is about 128K when DEFAULT_IO_BUFFER_SIZE = 128K. We might + * need a different slop factor to maintain that equivalence if + * DEFAULT_IO_BUFFER_SIZE is changed dramatically. */ - state->buflen *= 2; + state->buflen += state->buflen / 2; /* * LZ4F_compressBegin requires a buffer that is greater or equal to diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c index 4abb2e95abc..94c155a572d 100644 --- a/src/bin/pg_dump/compress_none.c +++ b/src/bin/pg_dump/compress_none.c @@ -23,6 +23,18 @@ */ /* + * We buffer outgoing data, just to ensure that data blocks written to the + * archive file are of reasonable size. The read side could use this struct, + * but there's no need because it does not retain data across calls. + */ +typedef struct NoneCompressorState +{ + char *buffer; /* buffer for unwritten data */ + size_t buflen; /* allocated size of buffer */ + size_t bufdata; /* amount of valid data currently in buffer */ +} NoneCompressorState; + +/* * Private routines */ @@ -49,13 +61,45 @@ static void WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs, const void *data, size_t dLen) { - cs->writeF(AH, data, dLen); + NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data; + size_t remaining = dLen; + + while (remaining > 0) + { + size_t chunk; + + /* Dump buffer if full */ + if (nonecs->bufdata >= nonecs->buflen) + { + cs->writeF(AH, nonecs->buffer, nonecs->bufdata); + nonecs->bufdata = 0; + } + /* And fill it */ + chunk = nonecs->buflen - nonecs->bufdata; + if (chunk > remaining) + chunk = remaining; + memcpy(nonecs->buffer + nonecs->bufdata, data, chunk); + nonecs->bufdata += chunk; + data = ((const char *) data) + chunk; + remaining -= chunk; + } } static void EndCompressorNone(ArchiveHandle *AH, CompressorState *cs) { - /* no op */ + NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data; + + if (nonecs) + { + /* Dump buffer if nonempty */ + if (nonecs->bufdata > 0) + cs->writeF(AH, nonecs->buffer, nonecs->bufdata); + /* Free working state */ + pg_free(nonecs->buffer); + pg_free(nonecs); + cs->private_data = NULL; + } } /* @@ -71,6 +115,22 @@ InitCompressorNone(CompressorState *cs, cs->end = EndCompressorNone; cs->compression_spec = compression_spec; + + /* + * If the caller has defined a write function, prepare the necessary + * buffer. + */ + if (cs->writeF) + { + NoneCompressorState *nonecs; + + nonecs = (NoneCompressorState *) pg_malloc(sizeof(NoneCompressorState)); + nonecs->buflen = DEFAULT_IO_BUFFER_SIZE; + nonecs->buffer = pg_malloc(nonecs->buflen); + nonecs->bufdata = 0; + + cs->private_data = nonecs; + } } diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ee1cab6190f..377a7946585 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1758,6 +1758,7 @@ NextValueExpr Node NodeTag NonEmptyRange +NoneCompressorState Notification NotificationList NotifyStmt |
