Skip to content

Commit 7559bf7

Browse files
committed
Fix new conversion filters for mobile SJIS variants ('0' at end of buffer)
Previously, I had adjusted this code so that if a character which could be part of a special Docomo/Softbank/KDDI 'keypad' emoji appeared at the end of one buffer, and the 'keypad' character appeared at the beginning of the next, they would still be combined. However, this broke the handling of such a character appearing at the end of one buffer, and a character which is NOT 'keypad' appearing at the beginning of the next. This was found while fuzzing the new implementation of mb_decode_numericentity.
1 parent fa83a8e commit 7559bf7

File tree

2 files changed

+25
-21
lines changed

2 files changed

+25
-21
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c

+11-21
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,7 @@ static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *bu
929929
{
930930
unsigned char *out, *limit;
931931
MB_CONVERT_BUF_LOAD(buf, out, limit);
932-
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
932+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
933933

934934
uint32_t w;
935935
unsigned int s = 0;
@@ -939,14 +939,15 @@ static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *bu
939939
w = buf->state;
940940
buf->state = 0;
941941
if (len) {
942-
goto process_possible_keypad;
942+
goto reprocess_wchar;
943943
} else {
944944
goto emit_output;
945945
}
946946
}
947947

948948
while (len--) {
949949
w = *in++;
950+
reprocess_wchar:
950951
s = 0;
951952

952953
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
@@ -1018,7 +1019,6 @@ static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *bu
10181019
break;
10191020
}
10201021
}
1021-
process_possible_keypad: ;
10221022
uint32_t w2 = *in++; len--;
10231023
if (w2 == 0x20E3) {
10241024
if (w == '#') {
@@ -1160,7 +1160,7 @@ static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf,
11601160
{
11611161
unsigned char *out, *limit;
11621162
MB_CONVERT_BUF_LOAD(buf, out, limit);
1163-
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1163+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
11641164

11651165
uint32_t w;
11661166
unsigned int s = 0;
@@ -1169,18 +1169,15 @@ static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf,
11691169
w = buf->state;
11701170
buf->state = 0;
11711171
if (len) {
1172-
if (w >= NFLAGS('A')) {
1173-
goto process_possible_flag;
1174-
} else {
1175-
goto process_possible_keypad;
1176-
}
1172+
goto reprocess_wchar;
11771173
} else {
11781174
goto emit_output;
11791175
}
11801176
}
11811177

11821178
while (len--) {
11831179
w = *in++;
1180+
reprocess_wchar:
11841181
s = 0;
11851182

11861183
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
@@ -1246,7 +1243,6 @@ static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf,
12461243
break;
12471244
}
12481245
}
1249-
process_possible_keypad: ;
12501246
uint32_t w2 = *in++; len--;
12511247
if (w2 == 0x20E3) {
12521248
if (w == '#') {
@@ -1271,7 +1267,6 @@ process_possible_keypad: ;
12711267
}
12721268
break;
12731269
}
1274-
process_possible_flag: ;
12751270
uint32_t w2 = *in++; len--;
12761271
if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
12771272
for (int i = 0; i < 10; i++) {
@@ -1472,7 +1467,7 @@ static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, b
14721467
{
14731468
unsigned char *out, *limit;
14741469
MB_CONVERT_BUF_LOAD(buf, out, limit);
1475-
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1470+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0));
14761471

14771472
uint32_t w;
14781473
unsigned int s = 0;
@@ -1481,18 +1476,15 @@ static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, b
14811476
w = buf->state;
14821477
buf->state = 0;
14831478
if (len) {
1484-
if (w >= NFLAGS('A')) {
1485-
goto process_possible_flag;
1486-
} else {
1487-
goto process_possible_keypad;
1488-
}
1479+
goto reprocess_wchar;
14891480
} else {
14901481
goto emit_output;
14911482
}
14921483
}
14931484

14941485
while (len--) {
14951486
w = *in++;
1487+
reprocess_wchar:
14961488
s = 0;
14971489

14981490
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
@@ -1558,7 +1550,6 @@ static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, b
15581550
break;
15591551
}
15601552
}
1561-
process_possible_keypad: ;
15621553
uint32_t w2 = *in++; len--;
15631554
if (w2 == 0x20E3) {
15641555
if (w == '#') {
@@ -1577,13 +1568,12 @@ process_possible_keypad: ;
15771568
if (end) {
15781569
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb);
15791570
} else {
1580-
/* Reprocess `w` when this function is called again with another buffer
1581-
* of wchars */
1571+
/* Reprocess `w` when this function is called again with
1572+
* another buffer of wchars */
15821573
buf->state = w;
15831574
}
15841575
break;
15851576
}
1586-
process_possible_flag: ;
15871577
uint32_t w2 = *in++; len--;
15881578
if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
15891579
for (int i = 0; i < 10; i++) {

ext/mbstring/tests/sjis_mobile_encodings.phpt

+14
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,20 @@ for ($i = 0; $i <= 256; $i++) {
324324
convertValidString(str_repeat("\x00a", $i) . "\x00\x30\x20\xE3", str_repeat('a', $i) . "\xF7\xC5", 'UTF-16BE', 'SJIS-Mobile#SOFTBANK');
325325
}
326326

327+
// Regression test for 0-9 appearing at end of one buffer and U+203E NOT appearing
328+
// at the beginning of the next
329+
for ($i = 0; $i <= 256; $i++) {
330+
convertValidString(str_repeat("\x000", $i), str_repeat('0', $i), 'UTF-16BE', 'SJIS-Mobile#DOCOMO');
331+
convertValidString(str_repeat("\x000", $i), str_repeat('0', $i), 'UTF-16BE', 'SJIS-Mobile#KDDI');
332+
convertValidString(str_repeat("\x000", $i), str_repeat('0', $i), 'UTF-16BE', 'SJIS-Mobile#SOFTBANK');
333+
}
334+
335+
// Regression test for not making enough space in output buffer when 0-9 appeared
336+
// at the end of one buffer and was re-processed together with the next
337+
// This crazy-looking string was found by a fuzzer
338+
$str = "\x04\xff\x930\x00\xffUTF7~'F\x00A\x00\xffA\x0018030@\x00[\x1b\$EEEEE\x5C\x80(8~\x00F\x00zgb-18030$\x008~\x00F\x00z-gb-18EUC_JP-2004\x00z-g0\x0018030\x00b-18030$\x008~\x00F\x00z-gb-18EUC_JP-2004\x00z-g0\x0018030\x00";
339+
mb_convert_encoding($str, 'SJIS-Mobile#SOFTBANK', 'SJIS-Mobile#SOFTBANK');
340+
327341
?>
328342
--EXPECT--
329343
SJIS-Mobile#DOCOMO verification and conversion works on all valid characters

0 commit comments

Comments
 (0)