Skip to content

Commit 983a29d

Browse files
committed
Legacy conversion code for '7bit' to '8bit' inserts error markers
The use of a special 'vtbl' for converting between '7bit' and '8bit' text meant that '7bit' text would not be converted to wchars before going to '8bit'. This meant that the special value MBFL_BAD_INPUT, which we use to flag an erroneous byte sequence in input text (and which is required by functions like mb_check_encoding), would pass directly to the output, instead of being converted to the error marker specified by mb_substitute_character. This issue dates back to the time when I removed the mbfl 'identify filters' and made encoding validity checking and encoding detection rely only on the conversion filters.
1 parent f3c8efd commit 983a29d

File tree

3 files changed

+20
-26
lines changed

3 files changed

+20
-26
lines changed

ext/mbstring/libmbfl/filters/mbfilter_7bit.c

+15-17
Original file line numberDiff line numberDiff line change
@@ -34,21 +34,18 @@
3434
static size_t mb_7bit_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
3535
static void mb_wchar_to_7bit(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
3636

37-
const mbfl_encoding mbfl_encoding_7bit = {
37+
const struct mbfl_convert_vtbl vtbl_7bit_wchar = {
3838
mbfl_no_encoding_7bit,
39-
"7bit",
40-
"7bit",
41-
NULL,
42-
NULL,
43-
MBFL_ENCTYPE_SBCS,
39+
mbfl_no_encoding_wchar,
40+
mbfl_filt_conv_common_ctor,
4441
NULL,
42+
mbfl_filt_conv_7bit_any,
43+
mbfl_filt_conv_common_flush,
4544
NULL,
46-
mb_7bit_to_wchar,
47-
mb_wchar_to_7bit
4845
};
4946

50-
const struct mbfl_convert_vtbl vtbl_8bit_7bit = {
51-
mbfl_no_encoding_8bit,
47+
const struct mbfl_convert_vtbl vtbl_wchar_7bit = {
48+
mbfl_no_encoding_wchar,
5249
mbfl_no_encoding_7bit,
5350
mbfl_filt_conv_common_ctor,
5451
NULL,
@@ -57,25 +54,26 @@ const struct mbfl_convert_vtbl vtbl_8bit_7bit = {
5754
NULL,
5855
};
5956

60-
const struct mbfl_convert_vtbl vtbl_7bit_8bit = {
57+
const mbfl_encoding mbfl_encoding_7bit = {
6158
mbfl_no_encoding_7bit,
62-
mbfl_no_encoding_8bit,
63-
mbfl_filt_conv_common_ctor,
59+
"7bit",
60+
"7bit",
6461
NULL,
65-
mbfl_filt_conv_7bit_any,
66-
mbfl_filt_conv_common_flush,
6762
NULL,
63+
MBFL_ENCTYPE_SBCS,
64+
&vtbl_7bit_wchar,
65+
&vtbl_wchar_7bit,
66+
mb_7bit_to_wchar,
67+
mb_wchar_to_7bit
6868
};
6969

70-
7170
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
7271

7372
int mbfl_filt_conv_7bit_any(int c, mbfl_convert_filter *filter)
7473
{
7574
return (*filter->output_function)(c < 0x80 ? c : MBFL_BAD_INPUT, filter->data);
7675
}
7776

78-
7977
int mbfl_filt_conv_any_7bit(int c, mbfl_convert_filter *filter)
8078
{
8179
if (c >= 0 && c < 0x80) {

ext/mbstring/libmbfl/mbfl/mbfl_convert.c

+4-8
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ static const struct mbfl_convert_vtbl *mbfl_special_filter_list[] = {
8585
&vtbl_uuencode_8bit,
8686
&vtbl_8bit_qprint,
8787
&vtbl_qprint_8bit,
88-
&vtbl_8bit_7bit,
89-
&vtbl_7bit_8bit,
9088
&vtbl_pass,
9189
NULL
9290
};
@@ -302,13 +300,11 @@ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
302300
const struct mbfl_convert_vtbl* mbfl_convert_filter_get_vtbl(const mbfl_encoding *from, const mbfl_encoding *to)
303301
{
304302
if (to->no_encoding == mbfl_no_encoding_base64 ||
305-
to->no_encoding == mbfl_no_encoding_qprint ||
306-
to->no_encoding == mbfl_no_encoding_7bit) {
303+
to->no_encoding == mbfl_no_encoding_qprint) {
307304
from = &mbfl_encoding_8bit;
308305
} else if (from->no_encoding == mbfl_no_encoding_base64 ||
309306
from->no_encoding == mbfl_no_encoding_qprint ||
310-
from->no_encoding == mbfl_no_encoding_uuencode ||
311-
from->no_encoding == mbfl_no_encoding_7bit) {
307+
from->no_encoding == mbfl_no_encoding_uuencode) {
312308
to = &mbfl_encoding_8bit;
313309
}
314310

@@ -353,9 +349,9 @@ zend_string* mb_fast_convert(unsigned char *in, size_t in_len, const mbfl_encodi
353349
uint32_t wchar_buf[128];
354350
unsigned int state = 0;
355351

356-
if (to == &mbfl_encoding_base64 || to == &mbfl_encoding_qprint || to == &mbfl_encoding_7bit) {
352+
if (to == &mbfl_encoding_base64 || to == &mbfl_encoding_qprint) {
357353
from = &mbfl_encoding_8bit;
358-
} else if (from == &mbfl_encoding_base64 || from == &mbfl_encoding_qprint || from == &mbfl_encoding_uuencode || from == &mbfl_encoding_7bit) {
354+
} else if (from == &mbfl_encoding_base64 || from == &mbfl_encoding_qprint || from == &mbfl_encoding_uuencode) {
359355
to = &mbfl_encoding_8bit;
360356
}
361357

ext/mbstring/tests/other_encodings.phpt

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ string(3) "ABC"
2828
string(1) "%"
2929
string(3) "ABC"
3030
bool(false)
31-
string(2) "%%"
31+
string(1) "%"
3232
7bit done
3333
string(1) "%"
3434
8bit done

0 commit comments

Comments
 (0)