summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2025-10-16 16:50:18 +0000
committerTom Lane2025-10-16 16:50:18 +0000
commit66ec01dc41243d756896777aa66df149ac8fa31d (patch)
tree242fa0ec2b1b75829b20e5b229e06d896c08d480
parent812221b204276b884d2b14ef56aabd9e1946be81 (diff)
Align the data block sizes of pg_dump's various compression modes.
After commit fe8192a95, compress_zstd.c tends to produce data block sizes around 128K, and we don't really have any control over that unless we want to overrule ZSTD_CStreamOutSize(). Which seems like a bad idea. But let's try to align the other compression modes to produce block sizes roughly comparable to that, so that pg_restore's skip-data performance isn't enormously different for different modes. gzip compression can be brought in line simply by setting DEFAULT_IO_BUFFER_SIZE = 128K, which this patch does. That increases some unrelated buffer sizes, but none of them seem problematic for modern platforms. lz4's idea of appropriate block size is highly nonlinear: if we just increase DEFAULT_IO_BUFFER_SIZE then the output blocks end up around 200K. I found that adjusting the slop factor in LZ4State_compression_init was a not-too-ugly way of bringing that number roughly into line. With compress = none you get data blocks the same sizes as the table rows, which seems potentially problematic for narrow tables. Introduce a layer of buffering to make that case match the others. Comments in compress_io.h and 002_pg_dump.pl suggest that if we increase DEFAULT_IO_BUFFER_SIZE then we need to increase the amount of data fed through the tests in order to improve coverage. I've not done that here, leaving it for a separate patch. Author: Tom Lane <[email protected]> Discussion: https://postgr.es/m/[email protected]
-rw-r--r--src/bin/pg_dump/compress_io.h4
-rw-r--r--src/bin/pg_dump/compress_lz4.c9
-rw-r--r--src/bin/pg_dump/compress_none.c64
-rw-r--r--src/tools/pgindent/typedefs.list1
4 files changed, 72 insertions, 6 deletions
diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h
index 25a7bf0904d..ae008585c89 100644
--- a/src/bin/pg_dump/compress_io.h
+++ b/src/bin/pg_dump/compress_io.h
@@ -22,9 +22,9 @@
*
* When changing this value, it's necessary to check the relevant test cases
* still exercise all the branches. This applies especially if the value is
- * increased, in which case the overflow buffer may not be needed.
+ * increased, in which case some loops may not get iterated.
*/
-#define DEFAULT_IO_BUFFER_SIZE 4096
+#define DEFAULT_IO_BUFFER_SIZE (128 * 1024)
extern char *supports_compression(const pg_compress_specification compression_spec);
diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c
index b817a083d38..450afd4e2be 100644
--- a/src/bin/pg_dump/compress_lz4.c
+++ b/src/bin/pg_dump/compress_lz4.c
@@ -100,9 +100,14 @@ LZ4State_compression_init(LZ4State *state)
state->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &state->prefs);
/*
- * Then double it, to ensure we're not forced to flush every time.
+ * Add some slop to ensure we're not forced to flush every time.
+ *
+ * The present slop factor of 50% is chosen so that the typical output
+ * block size is about 128K when DEFAULT_IO_BUFFER_SIZE = 128K. We might
+ * need a different slop factor to maintain that equivalence if
+ * DEFAULT_IO_BUFFER_SIZE is changed dramatically.
*/
- state->buflen *= 2;
+ state->buflen += state->buflen / 2;
/*
* LZ4F_compressBegin requires a buffer that is greater or equal to
diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c
index 4abb2e95abc..94c155a572d 100644
--- a/src/bin/pg_dump/compress_none.c
+++ b/src/bin/pg_dump/compress_none.c
@@ -23,6 +23,18 @@
*/
/*
+ * We buffer outgoing data, just to ensure that data blocks written to the
+ * archive file are of reasonable size. The read side could use this struct,
+ * but there's no need because it does not retain data across calls.
+ */
+typedef struct NoneCompressorState
+{
+ char *buffer; /* buffer for unwritten data */
+ size_t buflen; /* allocated size of buffer */
+ size_t bufdata; /* amount of valid data currently in buffer */
+} NoneCompressorState;
+
+/*
* Private routines
*/
@@ -49,13 +61,45 @@ static void
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen)
{
- cs->writeF(AH, data, dLen);
+ NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
+ size_t remaining = dLen;
+
+ while (remaining > 0)
+ {
+ size_t chunk;
+
+ /* Dump buffer if full */
+ if (nonecs->bufdata >= nonecs->buflen)
+ {
+ cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
+ nonecs->bufdata = 0;
+ }
+ /* And fill it */
+ chunk = nonecs->buflen - nonecs->bufdata;
+ if (chunk > remaining)
+ chunk = remaining;
+ memcpy(nonecs->buffer + nonecs->bufdata, data, chunk);
+ nonecs->bufdata += chunk;
+ data = ((const char *) data) + chunk;
+ remaining -= chunk;
+ }
}
static void
EndCompressorNone(ArchiveHandle *AH, CompressorState *cs)
{
- /* no op */
+ NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
+
+ if (nonecs)
+ {
+ /* Dump buffer if nonempty */
+ if (nonecs->bufdata > 0)
+ cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
+ /* Free working state */
+ pg_free(nonecs->buffer);
+ pg_free(nonecs);
+ cs->private_data = NULL;
+ }
}
/*
@@ -71,6 +115,22 @@ InitCompressorNone(CompressorState *cs,
cs->end = EndCompressorNone;
cs->compression_spec = compression_spec;
+
+ /*
+ * If the caller has defined a write function, prepare the necessary
+ * buffer.
+ */
+ if (cs->writeF)
+ {
+ NoneCompressorState *nonecs;
+
+ nonecs = (NoneCompressorState *) pg_malloc(sizeof(NoneCompressorState));
+ nonecs->buflen = DEFAULT_IO_BUFFER_SIZE;
+ nonecs->buffer = pg_malloc(nonecs->buflen);
+ nonecs->bufdata = 0;
+
+ cs->private_data = nonecs;
+ }
}
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index ee1cab6190f..377a7946585 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1758,6 +1758,7 @@ NextValueExpr
Node
NodeTag
NonEmptyRange
+NoneCompressorState
Notification
NotificationList
NotifyStmt