Skip to content

Commit e2654a5

Browse files
committedJan 3, 2023
Mark default interned strings as valid UTF-8 where appropriate
Aside from being used by the pcre extension, various functions in mbstring are faster if the input strings are known to be valid UTF-8. We might as well mark the default interned strings (which are initialized when PHP starts up) as valid UTF-8 where appropriate.
1 parent 88c99af commit e2654a5

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed
 

‎Zend/zend_string.c

+5
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,23 @@ ZEND_API void zend_interned_strings_init(void)
102102
str = zend_string_alloc(sizeof("")-1, 1);
103103
ZSTR_VAL(str)[0] = '\000';
104104
zend_empty_string = zend_new_interned_string_permanent(str);
105+
GC_ADD_FLAGS(zend_empty_string, IS_STR_VALID_UTF8);
105106

106107
s[1] = 0;
107108
for (i = 0; i < 256; i++) {
108109
s[0] = i;
109110
zend_one_char_string[i] = zend_new_interned_string_permanent(zend_string_init(s, 1, 1));
111+
if (i < 0x80) {
112+
GC_ADD_FLAGS(zend_one_char_string[i], IS_STR_VALID_UTF8);
113+
}
110114
}
111115

112116
/* known strings */
113117
zend_known_strings = pemalloc(sizeof(zend_string*) * ((sizeof(known_strings) / sizeof(known_strings[0]) - 1)), 1);
114118
for (i = 0; i < (sizeof(known_strings) / sizeof(known_strings[0])) - 1; i++) {
115119
str = zend_string_init(known_strings[i], strlen(known_strings[i]), 1);
116120
zend_known_strings[i] = zend_new_interned_string_permanent(str);
121+
GC_ADD_FLAGS(zend_known_strings[i], IS_STR_VALID_UTF8);
117122
}
118123
}
119124

0 commit comments

Comments
 (0)