Skip to content

Commit fa83a8e

Browse files
committedJul 18, 2022
Fix new conversion filter for HTML entities
While fuzzing the new mb_decode_numericentity implementation, I discovered that the fast conversion filter for 'HTML-ENTITIES' did not correctly handle an empty named entity ('&;'), nor did it correctly handle invalid named entities whose names were a prefix of a valid entity. Also, it did not correctly handle the case where a named entity is truncated and another named entity starts abruptly.
1 parent 9c3972f commit fa83a8e

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed
 

‎ext/mbstring/libmbfl/filters/mbfilter_htmlent.c

+10-5
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,11 @@ void mbfl_filt_conv_html_dec_copy(mbfl_convert_filter *src, mbfl_convert_filter
334334
memcpy(dest->opaque, src->opaque, html_enc_buffer_size+1);
335335
}
336336

337+
static bool is_html_entity_char(unsigned char c)
338+
{
339+
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '#';
340+
}
341+
337342
static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
338343
{
339344
unsigned char *p = *in, *e = p + *in_len;
@@ -345,9 +350,9 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
345350
if (c == '&') {
346351
/* Find terminating ; for HTML entity */
347352
unsigned char *terminator = p;
348-
while (terminator < e && *terminator != ';')
353+
while (terminator < e && is_html_entity_char(*terminator))
349354
terminator++;
350-
if (terminator < e) {
355+
if (terminator < e && *terminator == ';') {
351356
if (*p == '#' && (e - p) >= 2) {
352357
/* Numeric entity */
353358
unsigned int value = 0;
@@ -390,11 +395,11 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
390395
*out++ = value;
391396
p = terminator + 1;
392397
goto next_iteration;
393-
} else {
398+
} else if (terminator > p && terminator < e) {
394399
/* Named entity */
395400
mbfl_html_entity_entry *entity = (mbfl_html_entity_entry*)mbfl_html_entity_list;
396401
while (entity->name) {
397-
if (!strncmp((char*)p, entity->name, terminator - p)) {
402+
if (!strncmp((char*)p, entity->name, terminator - p) && strlen(entity->name) == terminator - p) {
398403
*out++ = entity->code;
399404
p = terminator + 1;
400405
goto next_iteration;
@@ -409,7 +414,7 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
409414
while (p < terminator && out < limit) {
410415
*out++ = *p++;
411416
}
412-
if (terminator < e && out < limit) {
417+
if (terminator < e && *terminator == ';' && out < limit) {
413418
*out++ = *p++;
414419
}
415420
} else {

‎ext/mbstring/tests/htmlent_encoding.phpt

+6
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ convertFromEntities("\x00", '&#00000;');
4545

4646
testConversion(str_repeat('', 100), str_repeat('&#12354;', 100));
4747

48+
convertFromEntities("&;", "&;");
49+
convertFromEntities("&f;", "&f;");
50+
51+
convertFromEntities("&A", "&&#65;");
52+
convertFromEntities("&A", "&&#x41;");
53+
4854
echo "Done!\n";
4955
?>
5056
--EXPECTF--

0 commit comments

Comments
 (0)