Skip to content

Commit 40f5048

Browse files
committedJul 18, 2022
Fix new conversion filter for UUEncode
This code (written by yours truly) was very broken on input strings long enough to require processing in multiple chunks. Fuzzing revealed this very quickly; after initial rework, further fuzzing also found a couple of very obscure bugs in corner cases.
1 parent 5fee30b commit 40f5048

File tree

2 files changed

+97
-17
lines changed

2 files changed

+97
-17
lines changed
 

‎ext/mbstring/libmbfl/filters/mbfilter_uuencode.c

+78-8
Original file line numberDiff line numberDiff line change
@@ -250,49 +250,119 @@ static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf,
250250
/* Every 3 bytes of input gets encoded as 4 bytes of output
251251
* Additionally, we have a 'length' byte and a newline for each line of output
252252
* (Maximum 45 input bytes can be encoded on a single output line)
253-
* Make space for two more bytes in case we start close to where a line must end */
254-
MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 2);
253+
* Make space for two more bytes in case we start close to where a line must end,
254+
* and another two if there are cached bits remaining from the previous call */
255+
MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 4);
255256

256-
unsigned int bytes_encoded = buf->state >> 1;
257+
unsigned int bytes_encoded = (buf->state >> 1) & 0x7F;
258+
/* UUEncode naturally wants to process input bytes in groups of 3, but
259+
* our buffer size may not be a multiple of 3
260+
* So there may be data from the previous call which we need to flush out */
261+
unsigned int n_cached_bits = (buf->state >> 8) & 0xFF;
262+
unsigned int cached_bits = buf->state >> 16;
257263

258264
if (!buf->state) {
259265
for (char *s = "begin 0644 filename\n"; *s; s++) {
260266
out = mb_convert_buf_add(out, *s);
261267
}
262268
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
263269
buf->state |= 1;
270+
} else if (!len && end && !bytes_encoded && !n_cached_bits) {
271+
/* Corner case: under EXTREMELY rare circumstances, it's possible that the
272+
* final call to this conversion function will happen with an empty input
273+
* buffer, leaving an unwanted trailing len byte in the output buffer. */
274+
buf->out--;
275+
return;
276+
} else {
277+
/* UUEncode starts each line with a byte which indicates how many bytes
278+
* are encoded on the line
279+
* This can create a problem, since we receive the incoming data one buffer
280+
* at a time, and there is no requirement that the buffers should be aligned
281+
* with line boundaries
282+
* So if a previous line was cut off, we need to go back and fix up
283+
* the preceding len byte */
284+
unsigned char *len_byte = out - (bytes_encoded * 4 / 3) - 1;
285+
if (n_cached_bits) {
286+
len_byte -= (n_cached_bits == 2) ? 1 : 2;
287+
}
288+
*len_byte = MIN(bytes_encoded + len + (n_cached_bits ? (n_cached_bits == 2 ? 1 : 2) : 0), 45) + 32;
289+
290+
if (n_cached_bits) {
291+
/* Flush out bits which remained from previous call */
292+
if (n_cached_bits == 2) {
293+
uint32_t w = cached_bits;
294+
uint32_t w2 = 0, w3 = 0;
295+
if (len) {
296+
w2 = *in++;
297+
len--;
298+
}
299+
if (len) {
300+
w3 = *in++;
301+
len--;
302+
}
303+
out = mb_convert_buf_add3(out, uuencode_six_bits((w << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
304+
} else {
305+
uint32_t w2 = cached_bits;
306+
uint32_t w3 = 0;
307+
if (len) {
308+
w3 = *in++;
309+
len--;
310+
}
311+
out = mb_convert_buf_add2(out, uuencode_six_bits((w2 << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
312+
}
313+
n_cached_bits = cached_bits = 0;
314+
goto possible_line_break;
315+
}
264316
}
265317

266318
while (len--) {
267319
uint32_t w = *in++;
268320
uint32_t w2 = 0, w3 = 0;
269321

270-
if (len) {
322+
if (!len) {
323+
if (!end) {
324+
out = mb_convert_buf_add(out, uuencode_six_bits((w >> 2) & 0x3F));
325+
/* Cache 2 remaining bits from 'w' */
326+
cached_bits = w & 0x3;
327+
n_cached_bits = 2;
328+
break;
329+
}
330+
} else {
271331
w2 = *in++;
272332
len--;
273333
}
274-
if (len) {
334+
335+
if (!len) {
336+
if (!end) {
337+
out = mb_convert_buf_add2(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)));
338+
/* Cache 4 remaining bits from 'w2' */
339+
cached_bits = w2 & 0xF;
340+
n_cached_bits = 4;
341+
break;
342+
}
343+
} else {
275344
w3 = *in++;
276345
len--;
277346
}
278347

279348
out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
280349

350+
possible_line_break:
281351
bytes_encoded += 3;
282352

283353
if (bytes_encoded >= 45) {
284354
out = mb_convert_buf_add(out, '\n');
285-
if (len) {
355+
if (len || !end) {
286356
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
287357
}
288358
bytes_encoded = 0;
289359
}
290360
}
291361

292-
if (bytes_encoded) {
362+
if (bytes_encoded && end) {
293363
out = mb_convert_buf_add(out, '\n');
294364
}
295365

296-
buf->state = (bytes_encoded << 1) | (buf->state & 1);
366+
buf->state = ((cached_bits & 0xFF) << 16) | ((n_cached_bits & 0xFF) << 8) | ((bytes_encoded & 0x7F) << 1) | (buf->state & 1);
297367
MB_CONVERT_BUF_STORE(buf, out, limit);
298368
}

‎ext/mbstring/tests/uuencode_encoding.phpt

+19-9
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ Temporary test of mbstring's UUEncode 'encoding'
44
mbstring
55
--FILE--
66
<?php
7+
srand(1000); // Make results consistent
78

89
/* Using mbstring to convert strings from UUEncode has already been deprecated
910
* So this test should be removed when the UUEncode 'encoding' is */
11+
error_reporting(E_ALL & ~E_DEPRECATED);
1012

1113
function testConversion($uuencode, $raw) {
1214
$converted = mb_convert_encoding($uuencode, '8bit', 'UUENCODE');
@@ -26,16 +28,24 @@ testConversion("begin 0644 filename\n::'1T<#HO+W=W=RYW:6MI<&5D:6\$N;W)G#0H`\n",
2628
testConversion("begin 0644 filename\n#`0(#\n", "\x01\x02\x03");
2729
testConversion("begin 0644 filename\n$`0(#\"@``\n", "\x01\x02\x03\n");
2830

31+
function testRoundTrip($data) {
32+
$encoded = mb_convert_encoding($data, 'UUENCODE', '8bit');
33+
$decoded = mb_convert_encoding($encoded, '8bit', 'UUENCODE');
34+
if ($data !== $decoded)
35+
die("Round-trip failed! Expected " . bin2hex($data) . " to round-trip; actually got " . bin2hex($decoded));
36+
}
37+
38+
for ($iterations = 0; $iterations < 500; $iterations++) {
39+
$strlen = rand(1, 300);
40+
$characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
41+
$randstring = '';
42+
for ($i = 0; $i < $strlen; $i++) {
43+
$randstring .= $characters[rand(0, strlen($characters) - 1)];
44+
}
45+
testRoundTrip($randstring);
46+
}
47+
2948
echo "Done!\n";
3049
?>
3150
--EXPECTF--
32-
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
33-
34-
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
35-
36-
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
37-
38-
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
39-
40-
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
4151
Done!

0 commit comments

Comments
 (0)