Skip to content

Commit aeccb13

Browse files
committed
Use new encoding conversion filters for mb_parse_str and php_mb_post_handler
When micro-benchmarking on relatively short ASCII strings, the new implementation was about 30% faster than the old one.
1 parent 98e5c4e commit aeccb13

File tree

2 files changed

+63
-66
lines changed

2 files changed

+63
-66
lines changed

ext/mbstring/mb_gpc.c

+33-66
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include "main/php_output.h"
2828
#include "ext/standard/info.h"
2929

30-
#include "php_variables.h"
3130
#include "php_globals.h"
3231
#include "rfc1867.h"
3332
#include "php_content_types.h"
@@ -173,33 +172,23 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
173172
/* }}} */
174173

175174
/* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */
176-
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res)
175+
const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *array_ptr, char *res)
177176
{
178177
char *var, *val;
179-
const char *s1, *s2;
180178
char *strtok_buf = NULL, **val_list = NULL;
181-
zval *array_ptr = (zval *) arg;
182-
size_t n, num, *len_list = NULL;
183-
size_t val_len, new_val_len;
184-
mbfl_string string, resvar, resval;
179+
size_t n, num = 1, *len_list = NULL;
180+
size_t new_val_len;
185181
const mbfl_encoding *from_encoding = NULL;
186182
mbfl_encoding_detector *identd = NULL;
187-
mbfl_buffer_converter *convd = NULL;
188-
189-
mbfl_string_init_set(&string, info->to_encoding);
190-
mbfl_string_init_set(&resvar, info->to_encoding);
191-
mbfl_string_init_set(&resval, info->to_encoding);
192183

193184
if (!res || *res == '\0') {
194185
goto out;
195186
}
196187

197-
/* count the variables(separators) contained in the "res".
198-
* separator may contain multiple separator chars.
199-
*/
200-
num = 1;
201-
for (s1=res; *s1 != '\0'; s1++) {
202-
for (s2=info->separator; *s2 != '\0'; s2++) {
188+
/* count variables contained in `res`.
189+
* separator may contain multiple separator chars; ANY of them demarcate variables */
190+
for (char *s1 = res; *s1; s1++) {
191+
for (const char *s2 = info->separator; *s2; s2++) {
203192
if (*s1 == *s2) {
204193
num++;
205194
}
@@ -212,7 +201,6 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
212201

213202
/* split and decode the query */
214203
n = 0;
215-
strtok_buf = NULL;
216204
var = php_strtok_r(res, info->separator, &strtok_buf);
217205
while (var) {
218206
val = strchr(var, '=');
@@ -255,6 +243,7 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
255243
if (identd != NULL) {
256244
n = 0;
257245
while (n < num) {
246+
mbfl_string string;
258247
string.val = (unsigned char *)val_list[n];
259248
string.len = len_list[n];
260249
if (mbfl_encoding_detector_feed(identd, &string)) {
@@ -273,62 +262,40 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
273262
}
274263
}
275264

276-
convd = NULL;
277-
if (from_encoding != &mbfl_encoding_pass) {
278-
convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0);
279-
if (convd != NULL) {
280-
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
281-
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
282-
} else {
283-
if (info->report_errors) {
284-
php_error_docref(NULL, E_WARNING, "Unable to create converter");
285-
}
286-
goto out;
287-
}
288-
}
289-
290265
/* convert encoding */
291-
string.encoding = from_encoding;
292-
293266
n = 0;
294267
while (n < num) {
295-
string.val = (unsigned char *)val_list[n];
296-
string.len = len_list[n];
297-
if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resvar) != NULL) {
298-
var = (char *)resvar.val;
299-
} else {
300-
var = val_list[n];
301-
}
302-
n++;
303-
string.val = (unsigned char *)val_list[n];
304-
string.len = len_list[n];
305-
if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) {
306-
val = (char *)resval.val;
307-
val_len = resval.len;
268+
if (from_encoding != &mbfl_encoding_pass && info->to_encoding != &mbfl_encoding_pass) {
269+
unsigned int num_errors = 0;
270+
zend_string *converted_var = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
271+
MBSTRG(illegalchars) += num_errors;
272+
n++;
273+
274+
num_errors = 0;
275+
zend_string *converted_val = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
276+
MBSTRG(illegalchars) += num_errors;
277+
n++;
278+
279+
/* `val` must be a pointer returned by `emalloc` */
280+
val = estrndup(ZSTR_VAL(converted_val), ZSTR_LEN(converted_val));
281+
if (sapi_module.input_filter(info->data_type, ZSTR_VAL(converted_var), &val, ZSTR_LEN(converted_val), &new_val_len)) {
282+
/* add variable to symbol table */
283+
php_register_variable_safe(ZSTR_VAL(converted_var), val, new_val_len, array_ptr);
284+
}
285+
zend_string_free(converted_var);
286+
zend_string_free(converted_val);
308287
} else {
309-
val = val_list[n];
310-
val_len = len_list[n];
311-
}
312-
n++;
313-
/* we need val to be emalloc()ed */
314-
val = estrndup(val, val_len);
315-
if (sapi_module.input_filter(info->data_type, var, &val, val_len, &new_val_len)) {
316-
/* add variable to symbol table */
317-
php_register_variable_safe(var, val, new_val_len, array_ptr);
288+
var = val_list[n++];
289+
val = estrndup(val_list[n], len_list[n]);
290+
if (sapi_module.input_filter(info->data_type, var, &val, len_list[n], &new_val_len)) {
291+
php_register_variable_safe(var, val, new_val_len, array_ptr);
292+
}
293+
n++;
318294
}
319295
efree(val);
320-
321-
if (convd != NULL){
322-
mbfl_string_clear(&resvar);
323-
mbfl_string_clear(&resval);
324-
}
325296
}
326297

327298
out:
328-
if (convd != NULL) {
329-
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
330-
mbfl_buffer_converter_delete(convd);
331-
}
332299
if (val_list != NULL) {
333300
efree((void *)val_list);
334301
}
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
--TEST--
2+
mb_parse_str() error handling
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
mb_internal_encoding('UTF-8');
8+
9+
$queries = array(
10+
"\x80\x80\x80",
11+
"\xFF=\xFF"
12+
);
13+
14+
foreach ($queries as $query) {
15+
echo "Query: " . bin2hex($query) . "\n";
16+
17+
$array = [];
18+
mb_parse_str($query, $array);
19+
20+
foreach ($array as $key => $value) {
21+
echo bin2hex($key) . "=>" . bin2hex($value) . "\n";
22+
}
23+
}
24+
25+
?>
26+
--EXPECT--
27+
Query: 808080
28+
3f3f3f=>
29+
Query: ff3dff
30+
3f=>3f

0 commit comments

Comments
 (0)