Skip to content

Commit e1351eb

Browse files
committedAug 2, 2022
Fix legacy text conversion filter for UTF-16
Make necessary changes to filter state before using CK macro.
1 parent 219fff3 commit e1351eb

File tree

1 file changed

+11
-12
lines changed

1 file changed

+11
-12
lines changed
 

‎ext/mbstring/libmbfl/filters/mbfilter_utf16.c

+11-12
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,10 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
149149
filter->status = 1;
150150
} else {
151151
int n = (filter->cache << 8) | (c & 0xFF);
152+
filter->cache = filter->status = 0;
152153
if (n == 0xFFFE) {
153154
/* Switch to little-endian mode */
154155
filter->filter_function = mbfl_filt_conv_utf16le_wchar;
155-
filter->cache = filter->status = 0;
156156
} else {
157157
filter->filter_function = mbfl_filt_conv_utf16be_wchar;
158158
if (n >= 0xD800 && n <= 0xDBFF) {
@@ -165,7 +165,6 @@ int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
165165
} else if (n != 0xFEFF) {
166166
CK((*filter->output_function)(n, filter->data));
167167
}
168-
filter->cache = filter->status = 0;
169168
}
170169
}
171170

@@ -189,11 +188,11 @@ int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
189188
filter->status = 2;
190189
} else if (n >= 0xDC00 && n <= 0xDFFF) {
191190
/* This is wrong; second part of surrogate pair has come first */
192-
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
193191
filter->status = 0;
192+
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
194193
} else {
195-
CK((*filter->output_function)(n, filter->data));
196194
filter->status = 0;
195+
CK((*filter->output_function)(n, filter->data));
197196
}
198197
break;
199198

@@ -206,17 +205,17 @@ int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
206205
n = ((filter->cache & 0xFF) << 8) | (c & 0xFF);
207206
if (n >= 0xD800 && n <= 0xDBFF) {
208207
/* Wrong; that's the first half of a surrogate pair, not the second */
209-
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
210208
filter->cache = n & 0x3FF;
211209
filter->status = 2;
210+
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
212211
} else if (n >= 0xDC00 && n <= 0xDFFF) {
212+
filter->status = 0;
213213
n = ((filter->cache & 0x3FF00) << 2) + (n & 0x3FF) + 0x10000;
214214
CK((*filter->output_function)(n, filter->data));
215-
filter->status = 0;
216215
} else {
216+
filter->status = 0;
217217
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
218218
CK((*filter->output_function)(n, filter->data));
219-
filter->status = 0;
220219
}
221220
}
222221

@@ -261,11 +260,11 @@ int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
261260
filter->status = 2;
262261
} else if ((c & 0xfc) == 0xdc) {
263262
/* This is wrong; the second part of the surrogate pair has come first */
264-
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
265263
filter->status = 0;
264+
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
266265
} else {
267-
CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
268266
filter->status = 0;
267+
CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
269268
}
270269
break;
271270

@@ -279,19 +278,19 @@ int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
279278
if (n >= 0xD800 && n <= 0xDBFF) {
280279
/* We previously saw the first part of a surrogate pair and were
281280
* expecting the second part; this is another first part */
282-
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
283281
filter->cache = n & 0x3FF;
284282
filter->status = 2;
283+
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
285284
} else if (n >= 0xDC00 && n <= 0xDFFF) {
286285
n = filter->cache + ((c & 0x3) << 8) + 0x10000;
287-
CK((*filter->output_function)(n, filter->data));
288286
filter->status = 0;
287+
CK((*filter->output_function)(n, filter->data));
289288
} else {
290289
/* The first part of a surrogate pair was followed by some other codepoint
291290
* which is not part of a surrogate pair at all */
291+
filter->status = 0;
292292
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
293293
CK((*filter->output_function)(n, filter->data));
294-
filter->status = 0;
295294
}
296295
break;
297296
}

0 commit comments

Comments
 (0)
Please sign in to comment.