Skip to content

Commit a9a6720

Browse files
committedJan 3, 2023
Implement mb_output_handler using fast text conversion filters
1 parent 2a8cecd commit a9a6720

File tree

2 files changed

+49
-78
lines changed

2 files changed

+49
-78
lines changed
 

‎ext/mbstring/mbstring.c

+47-77
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,8 @@ ZEND_TSRMLS_CACHE_UPDATE();
10231023
mbstring_globals->illegalchars = 0;
10241024
mbstring_globals->encoding_translation = 0;
10251025
mbstring_globals->strict_detection = 0;
1026-
mbstring_globals->outconv = NULL;
1026+
mbstring_globals->outconv_enabled = false;
1027+
mbstring_globals->outconv_state = 0;
10271028
mbstring_globals->http_output_conv_mimetypes = NULL;
10281029
#ifdef HAVE_MBREGEX
10291030
mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
@@ -1144,11 +1145,6 @@ PHP_RSHUTDOWN_FUNCTION(mbstring)
11441145
MBSTRG(current_detect_order_list) = NULL;
11451146
MBSTRG(current_detect_order_list_size) = 0;
11461147
}
1147-
if (MBSTRG(outconv) != NULL) {
1148-
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1149-
mbfl_buffer_converter_delete(MBSTRG(outconv));
1150-
MBSTRG(outconv) = NULL;
1151-
}
11521148

11531149
/* clear http input identification. */
11541150
MBSTRG(http_input_identify) = NULL;
@@ -1166,6 +1162,9 @@ PHP_RSHUTDOWN_FUNCTION(mbstring)
11661162
MBSTRG(http_output_set) = 0;
11671163
MBSTRG(http_input_set) = 0;
11681164

1165+
MBSTRG(outconv_enabled) = false;
1166+
MBSTRG(outconv_state) = 0;
1167+
11691168
#ifdef HAVE_MBREGEX
11701169
PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
11711170
#endif
@@ -1548,112 +1547,83 @@ PHP_FUNCTION(mb_parse_str)
15481547
}
15491548
/* }}} */
15501549

1551-
/* {{{ Returns string in output buffer converted to the http_output encoding */
15521550
PHP_FUNCTION(mb_output_handler)
15531551
{
1554-
char *arg_string;
1555-
size_t arg_string_len;
1552+
zend_string *str;
15561553
zend_long arg_status;
1557-
mbfl_string string, result;
1558-
const char *charset;
1559-
char *p;
1560-
const mbfl_encoding *encoding;
1561-
int last_feed;
1562-
size_t len;
1563-
unsigned char send_text_mimetype = 0;
1564-
char *s, *mimetype = NULL;
15651554

15661555
ZEND_PARSE_PARAMETERS_START(2, 2)
1567-
Z_PARAM_STRING(arg_string, arg_string_len)
1556+
Z_PARAM_STR(str)
15681557
Z_PARAM_LONG(arg_status)
15691558
ZEND_PARSE_PARAMETERS_END();
15701559

1571-
encoding = MBSTRG(current_http_output_encoding);
1572-
1573-
/* start phase only */
1574-
if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1575-
/* delete the converter just in case. */
1576-
if (MBSTRG(outconv)) {
1577-
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1578-
mbfl_buffer_converter_delete(MBSTRG(outconv));
1579-
MBSTRG(outconv) = NULL;
1580-
}
1560+
const mbfl_encoding *encoding = MBSTRG(current_http_output_encoding);
1561+
if (encoding == &mbfl_encoding_pass) {
1562+
RETURN_STR(zend_string_copy(str));
1563+
}
15811564

1582-
if (encoding == &mbfl_encoding_pass) {
1583-
RETURN_STRINGL(arg_string, arg_string_len);
1584-
}
1565+
if (arg_status & PHP_OUTPUT_HANDLER_START) {
1566+
bool free_mimetype = false;
1567+
char *mimetype = NULL;
15851568

1586-
/* analyze mime type */
1587-
if (SG(sapi_headers).mimetype &&
1588-
_php_mb_match_regex(
1589-
MBSTRG(http_output_conv_mimetypes),
1590-
SG(sapi_headers).mimetype,
1591-
strlen(SG(sapi_headers).mimetype))) {
1592-
if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL) {
1569+
/* Analyze mime type */
1570+
if (SG(sapi_headers).mimetype && _php_mb_match_regex(MBSTRG(http_output_conv_mimetypes), SG(sapi_headers).mimetype, strlen(SG(sapi_headers).mimetype))) {
1571+
char *s;
1572+
if ((s = strchr(SG(sapi_headers).mimetype, ';')) == NULL) {
15931573
mimetype = estrdup(SG(sapi_headers).mimetype);
15941574
} else {
1595-
mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1575+
mimetype = estrndup(SG(sapi_headers).mimetype, s - SG(sapi_headers).mimetype);
15961576
}
1597-
send_text_mimetype = 1;
1577+
free_mimetype = true;
15981578
} else if (SG(sapi_headers).send_default_content_type) {
15991579
mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
16001580
}
16011581

1602-
/* if content-type is not yet set, set it and activate the converter */
1603-
if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1604-
charset = encoding->mime_name;
1582+
/* If content-type is not yet set, set it and enable conversion */
1583+
if (SG(sapi_headers).send_default_content_type || free_mimetype) {
1584+
const char *charset = encoding->mime_name;
16051585
if (charset) {
1606-
len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
1586+
char *p;
1587+
size_t len = spprintf(&p, 0, "Content-Type: %s; charset=%s", mimetype, charset);
16071588
if (sapi_add_header(p, len, 0) != FAILURE) {
16081589
SG(sapi_headers).send_default_content_type = 0;
16091590
}
16101591
}
1611-
/* activate the converter */
1612-
MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1613-
if (send_text_mimetype){
1614-
efree(mimetype);
1615-
}
1592+
1593+
MBSTRG(outconv_enabled) = true;
1594+
}
1595+
1596+
if (free_mimetype) {
1597+
efree(mimetype);
16161598
}
16171599
}
16181600

1619-
/* just return if the converter is not activated. */
1620-
if (MBSTRG(outconv) == NULL) {
1621-
RETURN_STRINGL(arg_string, arg_string_len);
1601+
if (!MBSTRG(outconv_enabled)) {
1602+
RETURN_STR(zend_string_copy(str));
16221603
}
16231604

1624-
/* flag */
1625-
last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1626-
/* mode */
1627-
mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1628-
mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1605+
mb_convert_buf buf;
1606+
mb_convert_buf_init(&buf, ZSTR_LEN(str), MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode));
16291607

1630-
/* feed the string */
1631-
mbfl_string_init(&string);
1632-
/* these are not needed. convd has encoding info.
1633-
string.encoding = MBSTRG(current_internal_encoding);
1634-
*/
1635-
string.val = (unsigned char *)arg_string;
1636-
string.len = arg_string_len;
1608+
uint32_t wchar_buf[128];
1609+
unsigned char *in = (unsigned char*)ZSTR_VAL(str);
1610+
size_t in_len = ZSTR_LEN(str);
1611+
bool last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
16371612

1638-
mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
1639-
if (last_feed) {
1640-
mbfl_buffer_converter_flush(MBSTRG(outconv));
1613+
while (in_len) {
1614+
size_t out_len = MBSTRG(current_internal_encoding)->to_wchar(&in, &in_len, wchar_buf, 128, &MBSTRG(outconv_state));
1615+
ZEND_ASSERT(out_len <= 128);
1616+
encoding->from_wchar(wchar_buf, out_len, &buf, !in_len && last_feed);
16411617
}
1642-
/* get the converter output, and return it */
1643-
mbfl_buffer_converter_result(MBSTRG(outconv), &result);
16441618

1645-
// TODO: avoid reallocation ???
1646-
RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
1647-
efree(result.val);
1619+
MBSTRG(illegalchars) += buf.errors;
1620+
RETVAL_STR(mb_convert_buf_result(&buf));
16481621

1649-
/* delete the converter if it is the last feed. */
16501622
if (last_feed) {
1651-
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1652-
mbfl_buffer_converter_delete(MBSTRG(outconv));
1653-
MBSTRG(outconv) = NULL;
1623+
MBSTRG(outconv_enabled) = false;
1624+
MBSTRG(outconv_state) = 0;
16541625
}
16551626
}
1656-
/* }}} */
16571627

16581628
/* {{{ Convert a multibyte string to an array. If split_length is specified,
16591629
break the string down into chunks each split_length characters long. */

‎ext/mbstring/mbstring.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring)
9494
bool encoding_translation;
9595
bool strict_detection;
9696
size_t illegalchars;
97-
mbfl_buffer_converter *outconv;
97+
bool outconv_enabled;
98+
unsigned int outconv_state;
9899
void *http_output_conv_mimetypes;
99100
#ifdef HAVE_MBREGEX
100101
struct _zend_mb_regex_globals *mb_regex_globals;

0 commit comments

Comments
 (0)