Skip to content

Commit d9269be

Browse files
committed
Fix problems with ISO-2022-KR conversion
• The legacy conversion code did not emit an error marker if an escape sequence was truncated. • BOTH old and new conversion code would shift from KSC5601 (KS X 1001) mode to ASCII mode on an invalid escape sequence. This doesn't make any sense.
1 parent bfccdbd commit d9269be

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
145145
if (c == '$') {
146146
filter->status++;
147147
} else {
148-
filter->status = 0;
148+
filter->status &= ~0xF;
149149
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
150150
}
151151
break;
@@ -154,7 +154,7 @@ int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
154154
if (c == ')') {
155155
filter->status++;
156156
} else {
157-
filter->status = 0;
157+
filter->status &= ~0xF;
158158
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
159159
}
160160
break;
@@ -258,6 +258,10 @@ int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
258258

259259
static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
260260
{
261+
if (filter->status & 0xF) {
262+
/* Escape sequence or 2-byte character was truncated */
263+
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
264+
}
261265
/* back to ascii */
262266
if (filter->status & 0x10) {
263267
CK((*filter->output_function)(0x0f, filter->data)); /* shift in */
@@ -305,7 +309,6 @@ static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t
305309
p--;
306310
}
307311
*out++ = MBFL_BAD_INPUT;
308-
*state = ASCII;
309312
}
310313
} else if (c == 0xF) {
311314
*state = ASCII;

ext/mbstring/tests/iso2022kr_encoding.phpt

+3
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ convertInvalidString("\xFF\x86", "\x1B\$)C%", "UTF-16BE", "ISO-2022-KR");
114114
// character at the end of a string, although the string was already ending in ASCII mode
115115
convertValidString("\x68\x46\x00a", "\x1B\$)C\x0E\x68\x46\x0Fa", "UTF-16BE", "ISO-2022-KR", false);
116116

117+
// Regression test: Don't shift from KS X 1001 to ASCII mode on invalid escape sequence
118+
convertInvalidString("\x0E\x1BX\x74\x30", "\x00%\x76\x20", "ISO-2022-KR", "UTF-16BE", false);
119+
117120
// Test "long" illegal character markers
118121
mb_substitute_character("long");
119122
convertInvalidString("\x1B", "%", "ISO-2022-KR", "UTF-8");

0 commit comments

Comments
 (0)