Skip to content

Commit 6938e35

Browse files
committed
Fix legacy conversion filter for CP50220
1 parent 1662f7f commit 6938e35

File tree

2 files changed

+11
-0
lines changed

2 files changed

+11
-0
lines changed

ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c

+4
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,11 @@ static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter)
338338
if (filter->cache) {
339339
int s = mbfl_convert_kana(filter->cache, c, &consumed, NULL, mode);
340340
filter->cache = consumed ? 0 : c;
341+
/* Terrible hack to get CP50220 to emit error markers in the proper
342+
* position, not reordering them with subsequent characters */
343+
filter->filter_function = mbfl_filt_conv_wchar_cp50221;
341344
mbfl_filt_conv_wchar_cp50221(s, filter);
345+
filter->filter_function = mbfl_filt_conv_wchar_cp50220;
342346
} else if (c == 0) {
343347
/* This case has to be handled separately, since `filter->cache == 0` means
344348
* no codepoint is cached */

ext/mbstring/tests/cp5022x_encoding.phpt

+7
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,13 @@ $converted = mb_convert_encoding("ab\x00", 'UTF-16BE', 'CP50220');
375375
if ($converted !== "\x00a\x00b\x00\x00")
376376
die("Bad handling of trailing null byte (got " . bin2hex($converted) . ")");
377377

378+
// Previously, the CP50220 implementation would reorder error markers with
379+
// subsequent characters
380+
mb_substitute_character(0x3F);
381+
$converted = mb_convert_encoding("\xff\xff\x00&", 'CP50220', 'UTF-16BE');
382+
if ($converted !== '?&')
383+
die("Bad handling of erroneous codepoint followed by good one (got " . bin2hex($converted) . ")");
384+
378385
?>
379386
--EXPECT--
380387
ASCII support OK

0 commit comments

Comments
 (0)