Skip to content

Commit 4299e2d

Browse files
committed
JIS7/JIS8 encoding: treat truncated multibyte characters as error
1 parent b67e358 commit 4299e2d

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

ext/mbstring/libmbfl/filters/mbfilter_jis.c

+13-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include "unicode_table_cp932_ext.h"
3434
#include "unicode_table_jis.h"
3535

36+
static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter);
37+
3638
const mbfl_encoding mbfl_encoding_jis = {
3739
mbfl_no_encoding_jis,
3840
"JIS",
@@ -61,7 +63,7 @@ const struct mbfl_convert_vtbl vtbl_jis_wchar = {
6163
mbfl_filt_conv_common_ctor,
6264
NULL,
6365
mbfl_filt_conv_jis_wchar,
64-
mbfl_filt_conv_common_flush,
66+
mbfl_filt_conv_jis_wchar_flush,
6567
NULL,
6668
};
6769

@@ -81,7 +83,7 @@ const struct mbfl_convert_vtbl vtbl_2022jp_wchar = {
8183
mbfl_filt_conv_common_ctor,
8284
NULL,
8385
mbfl_filt_conv_jis_wchar,
84-
mbfl_filt_conv_common_flush,
86+
mbfl_filt_conv_jis_wchar_flush,
8587
NULL,
8688
};
8789

@@ -264,6 +266,15 @@ mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter)
264266
return c;
265267
}
266268

269+
static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter)
270+
{
271+
if ((filter->status & 0xF) == 1) {
272+
/* 2-byte (JIS X 0208 or 0212) character was truncated */
273+
CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data));
274+
}
275+
return 0;
276+
}
277+
267278
/*
268279
* wchar => JIS
269280
*/

ext/mbstring/tests/iso2022jp_encoding.phpt

+11
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,12 @@ for ($i = 0x21; $i <= 0x7E; $i++) {
125125
}
126126
}
127127

128+
/* Try truncated JISX0208 characters */
129+
for ($i = 0x21; $i <= 0x7E; $i++) {
130+
testInvalid("\x1B\$B" . chr($i), "\x00%", 'JIS');
131+
testInvalid("\x1B\$B" . chr($i), "\x00%", 'ISO-2022-JP');
132+
}
133+
128134
echo "JIS X 0208 support OK\n";
129135

130136
/* JIS7 supports escape to switch to JIS X 0212 charset, but ISO-2022-JP does not */
@@ -144,6 +150,11 @@ for ($i = 0x21; $i <= 0x7E; $i++) {
144150
}
145151
}
146152

153+
/* Try truncated JISX0212 characters */
154+
for ($i = 0x21; $i <= 0x7E; $i++) {
155+
testInvalid("\x1B\$(D" . chr($i), "\x00%", 'JIS');
156+
}
157+
147158
echo "JIS X 0212 support OK\n";
148159

149160
/* All possible escape sequences */

0 commit comments

Comments
 (0)