Skip to content

Commit f3c8efd

Browse files
committedAug 16, 2022
In legacy text conversion filters, reset filter state in 'flush' function
Up until now, I believed that mbstring had been designed such that (legacy) text conversion filter objects should not be re-used after the 'flush' function is called to complete a text conversion operation. However, it turns out that the implementation of _php_mb_encoding_handler_ex DID re-use filter objects after flush. That means that functions which were based on _php_mb_encoding_handler_ex, including mb_parse_str and php_mb_post_handler, would break in some cases; state left over from converting one substring (perhaps a variable name) would affect the results of converting another substring (perhaps the value of the same variable), and could cause extraneous characters to get inserted into the output. All this code should be deleted soon, but fixing it helps me to avoid spurious failures when fuzzing the new/old code to look for differences in behavior.
1 parent 18e526c commit f3c8efd

28 files changed

+35
-2
lines changed
 

‎ext/mbstring/libmbfl/filters/mbfilter_big5.c

+1
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter)
257257
{
258258
if (filter->status == 1) {
259259
/* 2-byte character was truncated */
260+
filter->status = 0;
260261
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
261262
}
262263

‎ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter)
322322
* escape sequence was truncated */
323323
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
324324
}
325+
filter->status = 0;
325326

326327
if (filter->flush_function) {
327328
(*filter->flush_function)(filter->data);
@@ -824,7 +825,7 @@ static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter)
824825
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
825826
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
826827
}
827-
filter->status &= 0xff;
828+
filter->status = 0;
828829

829830
if (filter->flush_function) {
830831
(*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_cp51932.c

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter)
178178
if (filter->status) {
179179
/* Input string was truncated */
180180
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
181+
filter->status = 0;
181182
}
182183

183184
if (filter->flush_function) {

‎ext/mbstring/libmbfl/filters/mbfilter_cp932.c

+1
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter)
217217
{
218218
if (filter->status) {
219219
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
220+
filter->status = 0;
220221
}
221222

222223
if (filter->flush_function) {

‎ext/mbstring/libmbfl/filters/mbfilter_cp936.c

+1
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter)
166166
{
167167
if (filter->status) {
168168
/* 2-byte character was truncated */
169+
filter->status = 0;
169170
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
170171
}
171172

‎ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c

+1
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter)
209209
{
210210
if (filter->status == 1) {
211211
/* 2-byte character was truncated */
212+
filter->status = 0;
212213
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
213214
}
214215

‎ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c

+1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter)
180180
{
181181
if (filter->status) {
182182
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
183+
filter->status = 0;
183184
}
184185

185186
if (filter->flush_function) {

‎ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c

+1
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter)
226226
{
227227
if (filter->status) {
228228
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
229+
filter->status = 0;
229230
}
230231

231232
if (filter->flush_function) {

‎ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c

+1
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter)
193193
{
194194
if (filter->status == 1) {
195195
/* 2-byte character was truncated */
196+
filter->status = 0;
196197
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
197198
}
198199

‎ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c

+1
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter)
245245
{
246246
if (filter->status) {
247247
/* 2-byte or 4-byte character was truncated */
248+
filter->status = 0;
248249
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
249250
}
250251

‎ext/mbstring/libmbfl/filters/mbfilter_gb18030.c

+1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter)
231231
{
232232
if (filter->status) {
233233
/* multi-byte character was truncated */
234+
filter->status = 0;
234235
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
235236
}
236237

‎ext/mbstring/libmbfl/filters/mbfilter_hz.c

+2
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter)
154154
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
155155
}
156156

157+
filter->status = 0;
158+
157159
if (filter->flush_function) {
158160
(*filter->flush_function)(filter->data);
159161
}

‎ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c

+2
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter)
219219
if (filter->status & 0xF) {
220220
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
221221
}
222+
filter->status = 0;
222223

223224
if (filter->flush_function) {
224225
(*filter->flush_function)(filter->data);
@@ -354,6 +355,7 @@ int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter)
354355
CK((*filter->output_function)('(', filter->data));
355356
CK((*filter->output_function)('B', filter->data));
356357
}
358+
filter->status = 0;
357359

358360
if (filter->flush_function) {
359361
(*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter)
178178
/* 2-byte character was truncated */
179179
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
180180
}
181+
filter->status = 0;
181182

182183
if (filter->flush_function) {
183184
(*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c

+2
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter)
313313
if (filter->status & 0xF) {
314314
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
315315
}
316+
filter->status = 0;
316317

317318
if (filter->flush_function) {
318319
(*filter->flush_function)(filter->data);
@@ -483,6 +484,7 @@ static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
483484
if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
484485
(*filter->output_function)(c1, filter->data);
485486
}
487+
filter->status = filter->cache = 0;
486488

487489
if (filter->flush_function) {
488490
(*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_jis.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter)
271271
* or else escape sequence was truncated */
272272
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
273273
}
274+
filter->status = 0;
274275

275276
if (filter->flush_function) {
276277
(*filter->flush_function)(filter->data);
@@ -451,7 +452,7 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter)
451452
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
452453
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
453454
}
454-
filter->status &= 0xff;
455+
filter->status = 0;
455456

456457
if (filter->flush_function != NULL) {
457458
return (*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_sjis.c

+1
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
183183
{
184184
if (filter->status) {
185185
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
186+
filter->status = 0;
186187
}
187188

188189
if (filter->flush_function) {

‎ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c

+1
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
491491
if (filter->status & 0xF) {
492492
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
493493
}
494+
filter->status = 0;
494495

495496
if (filter->flush_function) {
496497
return (*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c

+1
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
266266
static int mbfl_filt_conv_sjis_mac_wchar_flush(mbfl_convert_filter *filter)
267267
{
268268
if (filter->status == 1) {
269+
filter->status = 0;
269270
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
270271
}
271272

‎ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c

+2
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,7 @@ static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
722722
if (filter->status && filter->status != 4) {
723723
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
724724
}
725+
filter->status = 0;
725726

726727
if (filter->flush_function) {
727728
(*filter->flush_function)(filter->data);
@@ -826,6 +827,7 @@ int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
826827
{
827828
int c1 = filter->cache;
828829
if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
830+
filter->cache = filter->status = 0;
829831
CK((*filter->output_function)(c1, filter->data));
830832
} else if (filter->status == 2) {
831833
/* First of a pair of Regional Indicator codepoints came at the end of a string */

‎ext/mbstring/libmbfl/filters/mbfilter_ucs2.c

+1
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter)
218218
{
219219
if (filter->status) {
220220
/* Input string was truncated */
221+
filter->status = 0;
221222
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
222223
}
223224

‎ext/mbstring/libmbfl/filters/mbfilter_ucs4.c

+1
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ static int mbfl_filt_conv_ucs4_wchar_flush(mbfl_convert_filter *filter)
301301
/* Input string was truncated */
302302
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
303303
}
304+
filter->status = 0;
304305

305306
if (filter->flush_function) {
306307
(*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_uhc.c

+1
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter)
147147
{
148148
if (filter->status == 1) {
149149
/* 2-byte character was truncated */
150+
filter->status = 0;
150151
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
151152
}
152153

‎ext/mbstring/libmbfl/filters/mbfilter_utf16.c

+1
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
323323
{
324324
if (filter->status) {
325325
/* Input string was truncated */
326+
filter->status = 0;
326327
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
327328
}
328329

‎ext/mbstring/libmbfl/filters/mbfilter_utf32.c

+1
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter)
233233
/* Input string was truncated */
234234
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
235235
}
236+
filter->cache = filter->status = 0;
236237

237238
if (filter->flush_function) {
238239
(*filter->flush_function)(filter->data);

‎ext/mbstring/libmbfl/filters/mbfilter_utf7.c

+2
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ static int mbfl_filt_conv_utf7_wchar_flush(mbfl_convert_filter *filter)
267267
if (filter->cache) {
268268
/* Either we were expecting the 2nd half of a surrogate pair which
269269
* never came, or else the last Base64 data was not padded with zeroes */
270+
filter->cache = 0;
270271
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
271272
}
272273

@@ -373,6 +374,7 @@ int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter)
373374
{
374375
int status = filter->status;
375376
int cache = filter->cache;
377+
filter->status = filter->cache = 0;
376378

377379
/* flush fragments */
378380
switch (status) {

‎ext/mbstring/libmbfl/filters/mbfilter_utf7imap.c

+1
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ static int mbfl_filt_conv_utf7imap_wchar_flush(mbfl_convert_filter *filter)
287287
/* It is illegal for a UTF-7 IMAP string to end in a Base-64 encoded
288288
* section. It should always change back to ASCII before the end. */
289289
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
290+
filter->status = 0;
290291
}
291292

292293
if (filter->flush_function) {

‎ext/mbstring/libmbfl/filters/mbfilter_utf8.c

+1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ int mbfl_filt_conv_utf8_wchar_flush(mbfl_convert_filter *filter)
180180
{
181181
if (filter->status) {
182182
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
183+
filter->status = 0;
183184
}
184185

185186
if (filter->flush_function) {

0 commit comments

Comments
 (0)