Skip to content

Commit 6f53dbb

Browse files
committedJan 22, 2023
mb_scrub does not attempt to scrub known-valid UTF-8 strings
1 parent f4dd35e commit 6f53dbb

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed
 

‎ext/mbstring/mbstring.c

+8-5
Original file line numberDiff line numberDiff line change
@@ -5066,12 +5066,10 @@ PHP_FUNCTION(mb_chr)
50665066
/* {{{ */
50675067
PHP_FUNCTION(mb_scrub)
50685068
{
5069-
char* str;
5070-
size_t str_len;
5071-
zend_string *enc_name = NULL;
5069+
zend_string *str, *enc_name = NULL;
50725070

50735071
ZEND_PARSE_PARAMETERS_START(1, 2)
5074-
Z_PARAM_STRING(str, str_len)
5072+
Z_PARAM_STR(str)
50755073
Z_PARAM_OPTIONAL
50765074
Z_PARAM_STR_OR_NULL(enc_name)
50775075
ZEND_PARSE_PARAMETERS_END();
@@ -5081,7 +5079,12 @@ PHP_FUNCTION(mb_scrub)
50815079
RETURN_THROWS();
50825080
}
50835081

5084-
RETURN_STR(php_mb_convert_encoding_ex(str, str_len, enc, enc));
5082+
if (enc == &mbfl_encoding_utf8 && (GC_FLAGS(str) & IS_STR_VALID_UTF8)) {
5083+
/* A valid UTF-8 string will not be changed by mb_scrub; so just increment the refcount and return it */
5084+
RETURN_STR_COPY(str);
5085+
}
5086+
5087+
RETURN_STR(php_mb_convert_encoding_ex(ZSTR_VAL(str), ZSTR_LEN(str), enc, enc));
50855088
}
50865089
/* }}} */
50875090

‎ext/mbstring/tests/mb_scrub.phpt

+8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@ var_dump(
88
"?" === mb_scrub("\x80"),
99
"?" === mb_scrub("\x80", 'UTF-8')
1010
);
11+
12+
$utf8str = "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞";
13+
// Check $utf8str so it is marked as 'valid UTF-8'
14+
// This will enable optimized implementation of mb_scrub
15+
if (!mb_check_encoding($utf8str, 'UTF-8'))
16+
die("Test string should be valid UTF-8");
17+
var_dump(mb_scrub($utf8str));
1118
?>
1219
--EXPECT--
1320
bool(true)
1421
bool(true)
22+
string(122) "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞"

0 commit comments

Comments
 (0)
Please sign in to comment.