Skip to content

Commit 9538646

Browse files
committed
Implement php_mb_zend_encoding_converter using fast text conversion filters
1 parent e2654a5 commit 9538646

File tree

3 files changed

+7
-178
lines changed

3 files changed

+7
-178
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

-124
Original file line numberDiff line numberDiff line change
@@ -95,130 +95,6 @@
9595

9696
#include "rare_cp_bitvec.h"
9797

98-
/*
99-
* buffering converter
100-
*/
101-
mbfl_buffer_converter *
102-
mbfl_buffer_converter_new(
103-
const mbfl_encoding *from,
104-
const mbfl_encoding *to,
105-
size_t buf_initsz)
106-
{
107-
mbfl_buffer_converter *convd = emalloc(sizeof(mbfl_buffer_converter));
108-
convd->to = to;
109-
110-
/* create convert filter */
111-
convd->filter1 = NULL;
112-
convd->filter2 = NULL;
113-
if (mbfl_convert_filter_get_vtbl(from, to) != NULL) {
114-
convd->filter1 = mbfl_convert_filter_new(from, to, mbfl_memory_device_output, NULL, &convd->device);
115-
} else {
116-
convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, to, mbfl_memory_device_output, NULL, &convd->device);
117-
if (convd->filter2 != NULL) {
118-
convd->filter1 = mbfl_convert_filter_new(from,
119-
&mbfl_encoding_wchar,
120-
(output_function_t)convd->filter2->filter_function,
121-
(flush_function_t)convd->filter2->filter_flush,
122-
convd->filter2);
123-
if (convd->filter1 == NULL) {
124-
mbfl_convert_filter_delete(convd->filter2);
125-
}
126-
}
127-
}
128-
if (convd->filter1 == NULL) {
129-
efree(convd);
130-
return NULL;
131-
}
132-
133-
mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
134-
135-
return convd;
136-
}
137-
138-
void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
139-
{
140-
mbfl_convert_filter_delete(convd->filter1);
141-
if (convd->filter2) {
142-
mbfl_convert_filter_delete(convd->filter2);
143-
}
144-
mbfl_memory_device_clear(&convd->device);
145-
efree((void*)convd);
146-
}
147-
148-
void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
149-
{
150-
if (convd->filter2) {
151-
convd->filter2->illegal_mode = mode;
152-
} else {
153-
convd->filter1->illegal_mode = mode;
154-
}
155-
}
156-
157-
void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar)
158-
{
159-
if (convd->filter2) {
160-
convd->filter2->illegal_substchar = substchar;
161-
} else {
162-
convd->filter1->illegal_substchar = substchar;
163-
}
164-
}
165-
166-
size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
167-
{
168-
size_t n;
169-
unsigned char *p;
170-
mbfl_convert_filter *filter;
171-
172-
ZEND_ASSERT(convd);
173-
ZEND_ASSERT(string);
174-
175-
mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
176-
/* feed data */
177-
n = string->len;
178-
p = string->val;
179-
180-
filter = convd->filter1;
181-
if (filter != NULL) {
182-
while (n > 0) {
183-
if ((*filter->filter_function)(*p++, filter) < 0) {
184-
return p - string->val;
185-
}
186-
n--;
187-
}
188-
}
189-
return p - string->val;
190-
}
191-
192-
void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
193-
{
194-
mbfl_convert_filter_flush(convd->filter1);
195-
}
196-
197-
mbfl_string* mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
198-
{
199-
result->encoding = convd->to;
200-
return mbfl_memory_device_result(&convd->device, result);
201-
}
202-
203-
mbfl_string* mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result)
204-
{
205-
mbfl_buffer_converter_feed(convd, string);
206-
mbfl_convert_filter_flush(convd->filter1);
207-
result->encoding = convd->to;
208-
return mbfl_memory_device_result(&convd->device, result);
209-
}
210-
211-
size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
212-
{
213-
size_t num_illegalchars = convd->filter1->num_illegalchar;
214-
215-
if (convd->filter2) {
216-
num_illegalchars += convd->filter2->num_illegalchar;
217-
}
218-
219-
return num_illegalchars;
220-
}
221-
22298
/*
22399
* encoding detector
224100
*/

ext/mbstring/libmbfl/mbfl/mbfilter.h

-22
Original file line numberDiff line numberDiff line change
@@ -125,28 +125,6 @@
125125
#define MIN(a,b) ((a)<(b)?(a):(b))
126126
#endif
127127

128-
/*
129-
* buffering converter
130-
*/
131-
typedef struct _mbfl_buffer_converter mbfl_buffer_converter;
132-
133-
struct _mbfl_buffer_converter {
134-
mbfl_convert_filter *filter1;
135-
mbfl_convert_filter *filter2;
136-
mbfl_memory_device device;
137-
const mbfl_encoding *to;
138-
};
139-
140-
MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(const mbfl_encoding *from, const mbfl_encoding *to, size_t buf_initsz);
141-
MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd);
142-
MBFLAPI extern void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode);
143-
MBFLAPI extern void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar);
144-
MBFLAPI extern size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string);
145-
MBFLAPI extern void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd);
146-
MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result);
147-
MBFLAPI extern mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
148-
MBFLAPI extern size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd);
149-
150128
/*
151129
* encoding detector
152130
*/

ext/mbstring/mbstring.c

+7-32
Original file line numberDiff line numberDiff line change
@@ -452,40 +452,15 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a
452452

453453
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
454454
{
455-
mbfl_string string, result;
456-
mbfl_buffer_converter *convd;
457-
458-
/* new encoding */
459-
/* initialize string */
460-
string.encoding = (const mbfl_encoding*)encoding_from;
461-
string.val = (unsigned char*)from;
462-
string.len = from_length;
463-
464-
/* initialize converter */
465-
convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
466-
if (convd == NULL) {
467-
return (size_t) -1;
468-
}
469-
470-
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
471-
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
472-
473-
/* do it */
474-
size_t loc = mbfl_buffer_converter_feed(convd, &string);
475-
476-
mbfl_buffer_converter_flush(convd);
477-
mbfl_string_init(&result);
478-
if (!mbfl_buffer_converter_result(convd, &result)) {
479-
mbfl_buffer_converter_delete(convd);
480-
return (size_t)-1;
481-
}
482-
483-
*to = result.val;
484-
*to_length = result.len;
455+
unsigned int num_errors = 0;
456+
zend_string *result = mb_fast_convert((unsigned char*)from, from_length, (const mbfl_encoding*)encoding_from, (const mbfl_encoding*)encoding_to, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
485457

486-
mbfl_buffer_converter_delete(convd);
458+
*to_length = ZSTR_LEN(result);
459+
*to = emalloc(ZSTR_LEN(result) + 1); /* Include terminating null byte */
460+
memcpy(*to, ZSTR_VAL(result), ZSTR_LEN(result) + 1);
461+
zend_string_free(result);
487462

488-
return loc;
463+
return from_length;
489464
}
490465

491466
static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)

0 commit comments

Comments
 (0)