Skip to content

Commit 3fa836f

Browse files
committed
Add test cases for mb_strcut
1 parent c8e1fc0 commit 3fa836f

File tree

2 files changed

+315
-13
lines changed

2 files changed

+315
-13
lines changed

ext/mbstring/tests/bug49354.phpt

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ Bug #49354 (mb_strcut() cuts wrong length when offset is in the middle of a mult
44
mbstring
55
--FILE--
66
<?php
7-
$crap = 'AåBäCöDü';
7+
$crap = 'AåBäCöDü'; // sequence of 1-byte and 2-byte chars: [41 c3a5 42 c3a4 43 c3b6 44 c3bc]
88
var_dump(mb_strcut($crap, 0, 100, 'UTF-8'));
9-
var_dump(mb_strcut($crap, 1, 100, 'UTF-8'));
10-
var_dump(mb_strcut($crap, 2, 100, 'UTF-8'));
11-
var_dump(mb_strcut($crap, 3, 100, 'UTF-8'));
9+
var_dump(mb_strcut($crap, 1, 100, 'UTF-8')); // skip over A
10+
var_dump(mb_strcut($crap, 2, 100, 'UTF-8')); // cut in middle of å
11+
var_dump(mb_strcut($crap, 3, 100, 'UTF-8')); // skip over Aå
1212
var_dump(mb_strcut($crap, 12, 100, 'UTF-8'));
1313
var_dump(mb_strcut($crap, 13, 100, 'UTF-8'));
1414

ext/mbstring/tests/mb_strcut.phpt

+311-9
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,26 @@ function MBStringChars($string, $encoding) {
1616
ini_set('include_path', __DIR__);
1717
include_once('common.inc');
1818

19-
// EUC-JP
2019
$euc_jp = pack('H*', '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3');
21-
// UTF-8
22-
$utf8 = pack('H*', 'e288ae2045e28b856461203d2051'); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
23-
// UTF-16LE
20+
$utf8 = pack('H*', 'e288ae2045e28b856461203d2051'); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
2421
$utf16le = pack('H*', '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300');
22+
$utf32be = mb_convert_encoding($utf8, 'UTF-32BE', 'UTF-8');
23+
$iso2022jp = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP', 'UTF-8'); // [<escape sequence>1b2442 3441 3b7a <escape sequence>1b2842 20 61 62 63 20 <escape sequence>1b2442 252b 254a <escape sequence>1b2842]
24+
$jis = mb_convert_encoding("漢字 abc カナ", 'JIS', 'UTF-8');
25+
// For testing ISO-2022-JP-2004, add a Kanji character which is in JISX 0213
26+
$iso2022jp2004 = mb_convert_encoding("漢字 abc カナ凜", 'ISO-2022-JP-2004', 'UTF-8'); // [1b242851 3441 3b7a 1b2842 20 61 62 63 20 1b242851 252b 254a 7425 1b2842]
27+
$iso2022jpms = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP-MS', 'UTF-8'); // [1b2442 3441 3b7a 1b2842 20 61 62 63 20 1b2442 252b 254a 1b2842]
28+
$iso2022jp_kddi = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP-KDDI', 'UTF-8');
2529

2630
print "== EUC-JP ==\n";
27-
print MBStringChars(mb_strcut($euc_jp, 6, 5,'EUC-JP'), 'EUC-JP') . "\n";
28-
print MBStringChars(mb_strcut($euc_jp, 5, 5,'EUC-JP'), 'EUC-JP') . "\n";
29-
print MBStringChars(mb_strcut($euc_jp, 0, 100,'EUC-JP'), 'EUC-JP') . "\n";
31+
print MBStringChars(mb_strcut($euc_jp, 6, 5, 'EUC-JP'), 'EUC-JP') . "\n";
32+
print MBStringChars(mb_strcut($euc_jp, 5, 5, 'EUC-JP'), 'EUC-JP') . "\n";
33+
print MBStringChars(mb_strcut($euc_jp, 0, 100, 'EUC-JP'), 'EUC-JP') . "\n";
3034

31-
$str = mb_strcut($euc_jp, 100, 10,'EUC-JP');
35+
$str = mb_strcut($euc_jp, 100, 10, 'EUC-JP');
3236
($str === "") ? print "OK\n" : print "No good\n";
3337

34-
$str = mb_strcut($euc_jp, -100, 10,'EUC-JP');
38+
$str = mb_strcut($euc_jp, -100, 10, 'EUC-JP');
3539
($str !== "") ? print "OK\n" : print "No good\n";
3640

3741
print "== UTF-8 ==\n";
@@ -45,6 +49,17 @@ print MBStringChars(mb_strcut($utf8, 1, 2, 'UTF-8'), 'UTF-8') . "\n";
4549
print MBStringChars(mb_strcut($utf8, 1, 3, 'UTF-8'), 'UTF-8') . "\n";
4650
print MBStringChars(mb_strcut($utf8, 1, 4, 'UTF-8'), 'UTF-8') . "\n";
4751

52+
print MBStringChars(mb_strcut('AåBäCöDü', 2, 100, 'UTF-8'), 'UTF-8') . "\n";
53+
54+
print "== UTF-16 ==\n";
55+
print "Single byte: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'UTF-16')) . "]\n";
56+
print "With from=1: [" . bin2hex(mb_strcut("\xff\x01", 1, 100, "UTF-16")) . "]\n";
57+
print "Bad surrogate: [" . bin2hex(mb_strcut("\xD9\xFF", 0, 100, "UTF-16")) . "]\n";
58+
print "Bad surrogate followed by other bytes: [" . bin2hex(mb_strcut("\xd9\x00\x12C", 0, 100, "UTF-16")) . "]\n";
59+
print "BE byte order mark: [" . bin2hex(mb_strcut("\xFE\xFF", 0, 100, "UTF-16")) . "]\n";
60+
print "LE byte order mark: [" . bin2hex(mb_strcut("\xFF\xFE", 0, 100, "UTF-16")) . "]\n";
61+
print "Length=0: [" . bin2hex(mb_strcut("\x00\x01\x00\x00", 1, -512, "UTF-16")) . "]\n";
62+
4863
print "== UTF-16LE ==\n";
4964
print MBStringChars(mb_strcut($utf16le, 0, 0, 'UTF-16LE'), 'UTF-16LE') . "\n";
5065
print MBStringChars(mb_strcut($utf16le, 0, 1, 'UTF-16LE'), 'UTF-16LE') . "\n";
@@ -54,6 +69,162 @@ print MBStringChars(mb_strcut($utf16le, 1, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
5469
print MBStringChars(mb_strcut($utf16le, 1, 3, 'UTF-16LE'), 'UTF-16LE') . "\n";
5570
print MBStringChars(mb_strcut($utf16le, 1, 4, 'UTF-16LE'), 'UTF-16LE') . "\n";
5671

72+
print "Single byte: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'UTF-16LE')) . "]\n";
73+
74+
print "== UTF-32BE ==\n";
75+
print MBStringChars(mb_strcut($utf32be, 0, 3, 'UTF-32BE'), 'UTF-32BE') . "\n";
76+
print MBStringChars(mb_strcut($utf32be, 0, 4, 'UTF-32BE'), 'UTF-32BE') . "\n";
77+
print MBStringChars(mb_strcut($utf32be, 0, 5, 'UTF-32BE'), 'UTF-32BE') . "\n";
78+
print MBStringChars(mb_strcut($utf32be, 1, 8, 'UTF-32BE'), 'UTF-32BE') . "\n";
79+
print MBStringChars(mb_strcut($utf32be, 3, 9, 'UTF-32BE'), 'UTF-32BE') . "\n";
80+
81+
print "== ISO-2022-JP ==\n";
82+
print MBStringChars(mb_strcut($iso2022jp, 0, 3, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
83+
print MBStringChars(mb_strcut($iso2022jp, 0, 4, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
84+
print MBStringChars(mb_strcut($iso2022jp, 0, 5, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
85+
print MBStringChars(mb_strcut($iso2022jp, 0, 6, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
86+
print MBStringChars(mb_strcut($iso2022jp, 0, 7, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
87+
print MBStringChars(mb_strcut($iso2022jp, 0, 8, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
88+
89+
print MBStringChars(mb_strcut($iso2022jp, 1, 3, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
90+
print MBStringChars(mb_strcut($iso2022jp, 1, 6, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
91+
print MBStringChars(mb_strcut($iso2022jp, 1, 8, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
92+
93+
print MBStringChars(mb_strcut($iso2022jp, 2, 5, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
94+
print MBStringChars(mb_strcut($iso2022jp, 5, 9, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
95+
print MBStringChars(mb_strcut($iso2022jp, 5, 11, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
96+
print MBStringChars(mb_strcut($iso2022jp, 6, 13, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
97+
print MBStringChars(mb_strcut($iso2022jp, 7, 13, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
98+
99+
print MBStringChars(mb_strcut($iso2022jp, 1, 100, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
100+
print MBStringChars(mb_strcut($iso2022jp, 50, 100, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
101+
102+
print "Error followed by ASCII char: [" . bin2hex(mb_strcut("\xdaK", 0, 100, "ISO-2022-JP")) . "]\n";
103+
104+
print "== ISO-2022-JP-2004 ==\n";
105+
print MBStringChars(mb_strcut($iso2022jp2004, 0, 3, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
106+
print MBStringChars(mb_strcut($iso2022jp2004, 0, 4, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
107+
print MBStringChars(mb_strcut($iso2022jp2004, 0, 5, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
108+
print MBStringChars(mb_strcut($iso2022jp2004, 0, 6, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
109+
print MBStringChars(mb_strcut($iso2022jp2004, 0, 7, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
110+
print MBStringChars(mb_strcut($iso2022jp2004, 0, 8, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
111+
print MBStringChars(mb_strcut($iso2022jp2004, 0, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
112+
113+
print MBStringChars(mb_strcut($iso2022jp2004, 1, 3, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
114+
print MBStringChars(mb_strcut($iso2022jp2004, 1, 6, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
115+
print MBStringChars(mb_strcut($iso2022jp2004, 1, 8, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
116+
print MBStringChars(mb_strcut($iso2022jp2004, 1, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
117+
118+
print MBStringChars(mb_strcut($iso2022jp2004, 2, 5, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
119+
print MBStringChars(mb_strcut($iso2022jp2004, 5, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
120+
print MBStringChars(mb_strcut($iso2022jp2004, 5, 11, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
121+
print MBStringChars(mb_strcut($iso2022jp2004, 6, 13, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
122+
print MBStringChars(mb_strcut($iso2022jp2004, 7, 13, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
123+
124+
print MBStringChars(mb_strcut($iso2022jp2004, 1, 100, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
125+
print MBStringChars(mb_strcut($iso2022jp2004, 50, 100, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
126+
127+
print "== ISO-2022-JP-MS ==\n";
128+
print MBStringChars(mb_strcut($iso2022jpms, 0, 3, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
129+
print MBStringChars(mb_strcut($iso2022jpms, 0, 4, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
130+
print MBStringChars(mb_strcut($iso2022jpms, 0, 5, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
131+
print MBStringChars(mb_strcut($iso2022jpms, 0, 6, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
132+
print MBStringChars(mb_strcut($iso2022jpms, 0, 7, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
133+
print MBStringChars(mb_strcut($iso2022jpms, 0, 8, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
134+
print MBStringChars(mb_strcut($iso2022jpms, 0, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
135+
136+
print MBStringChars(mb_strcut($iso2022jpms, 1, 3, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
137+
print MBStringChars(mb_strcut($iso2022jpms, 1, 6, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
138+
print MBStringChars(mb_strcut($iso2022jpms, 1, 8, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
139+
print MBStringChars(mb_strcut($iso2022jpms, 1, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
140+
141+
print MBStringChars(mb_strcut($iso2022jpms, 2, 5, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
142+
print MBStringChars(mb_strcut($iso2022jpms, 5, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
143+
print MBStringChars(mb_strcut($iso2022jpms, 5, 11, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
144+
print MBStringChars(mb_strcut($iso2022jpms, 6, 13, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
145+
print MBStringChars(mb_strcut($iso2022jpms, 7, 13, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
146+
147+
print MBStringChars(mb_strcut($iso2022jpms, 1, 100, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
148+
print MBStringChars(mb_strcut($iso2022jpms, 50, 100, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
149+
150+
print "== JIS ==\n";
151+
print MBStringChars(mb_strcut($jis, 0, 3, 'JIS'), 'JIS') . "\n";
152+
print MBStringChars(mb_strcut($jis, 0, 4, 'JIS'), 'JIS') . "\n";
153+
print MBStringChars(mb_strcut($jis, 0, 5, 'JIS'), 'JIS') . "\n";
154+
print MBStringChars(mb_strcut($jis, 0, 6, 'JIS'), 'JIS') . "\n";
155+
print MBStringChars(mb_strcut($jis, 0, 7, 'JIS'), 'JIS') . "\n";
156+
print MBStringChars(mb_strcut($jis, 0, 8, 'JIS'), 'JIS') . "\n";
157+
158+
print MBStringChars(mb_strcut($jis, 1, 3, 'JIS'), 'JIS') . "\n";
159+
print MBStringChars(mb_strcut($jis, 1, 6, 'JIS'), 'JIS') . "\n";
160+
print MBStringChars(mb_strcut($jis, 1, 8, 'JIS'), 'JIS') . "\n";
161+
162+
print MBStringChars(mb_strcut($jis, 2, 5, 'JIS'), 'JIS') . "\n";
163+
print MBStringChars(mb_strcut($jis, 5, 9, 'JIS'), 'JIS') . "\n";
164+
print MBStringChars(mb_strcut($jis, 5, 11, 'JIS'), 'JIS') . "\n";
165+
print MBStringChars(mb_strcut($jis, 6, 13, 'JIS'), 'JIS') . "\n";
166+
print MBStringChars(mb_strcut($jis, 7, 13, 'JIS'), 'JIS') . "\n";
167+
168+
print MBStringChars(mb_strcut($jis, 1, 100, 'JIS'), 'JIS') . "\n";
169+
print MBStringChars(mb_strcut($jis, 50, 100, 'JIS'), 'JIS') . "\n";
170+
171+
print "0xA3: [" . bin2hex(mb_strcut("\xA3aaaaaa", 0, 100, 'JIS')) . "]\n";
172+
print "Bad escape sequence followed by null byte: [" . bin2hex(mb_strcut("\x1b\x00", 1, 100, "JIS")) . "]\n";
173+
174+
print "== ISO-2022-JP-KDDI ==\n";
175+
print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 3, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
176+
print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 4, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
177+
print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 5, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
178+
print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 6, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
179+
print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 7, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
180+
print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 8, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
181+
182+
print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 3, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
183+
print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 6, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
184+
print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 8, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
185+
186+
print MBStringChars(mb_strcut($iso2022jp_kddi, 2, 5, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
187+
print MBStringChars(mb_strcut($iso2022jp_kddi, 5, 9, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
188+
print MBStringChars(mb_strcut($iso2022jp_kddi, 5, 11, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
189+
print MBStringChars(mb_strcut($iso2022jp_kddi, 6, 13, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
190+
print MBStringChars(mb_strcut($iso2022jp_kddi, 7, 13, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
191+
192+
print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 100, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
193+
print MBStringChars(mb_strcut($iso2022jp_kddi, 50, 100, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
194+
195+
print "== CP50220 ==\n";
196+
197+
print "Single byte 0xFF: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'CP50220')) . "]\n";
198+
print "Double byte 0xFF: [" . bin2hex(mb_strcut("\xFF\xFF", 0, 100, 'CP50220')) . "]\n";
199+
print "Sample string with multiple null bytes: [" . bin2hex(mb_strcut("\xCF\x00\x00\x00\x00\x00d\x00\x00", 0, 100, 'CP50220')) . "]\n";
200+
print "Bad escape sequence preceded by bad bytes: [" . bin2hex(mb_strcut("\xFF\xFF\x1B\x00", 0, 100, 'CP50220')) . "]\n";
201+
print "Good JISX 0208 sequence, but it won't fit in max number of bytes: [" . bin2hex(mb_strcut("\x1B\$BGV\x17", 0, 100, 'CP50220')) . "]\n";
202+
print "Bad escape sequence followed by GR kana: [" . bin2hex(mb_strcut("\x1B\$\xAC\x13", 0, 100, 'CP50220')) . "]\n";
203+
204+
print "== UTF-7 ==\n";
205+
206+
print "Single byte 0x01: [" . mb_strcut("\x01", 0, 100, 'UTF-7') . "]\n";
207+
print "UTF-16 section ends abruptly: [" . mb_strcut("+Q", 1, 100, 'UTF-7') . "]\n";
208+
print "UTF-16 section ends abruptly in middle of 2nd codepoint: [" . mb_strcut("+QxxC", 0, 100, 'UTF-7') . "]\n";
209+
print "Cutting in middle of UTF-16 section: [" . mb_strcut("+UUU", -1, 255, "UTF-7") . "]\n";
210+
print "Cutting in middle of UTF-16 section (2): [" . mb_strcut("+UUUU", -2, 255, "UTF-7") . "]\n";
211+
212+
print "== UTF7-IMAP ==\n";
213+
214+
print "Single byte 0x01: [" . mb_strcut("\x01", 0, 100, 'UTF7-IMAP') . "]\n";
215+
print "UTF-16 section ends abruptly: [" . mb_strcut("&Q", 1, 100, 'UTF7-IMAP') . "]\n";
216+
print "UTF-16 section ends abruptly in middle of 2nd codepoint: [" . mb_strcut("&QxxC", 0, 100, 'UTF7-IMAP') . "]\n";
217+
print "UTF-16 section is terminated improperly: [" . mb_strcut("&i6o\x83", 0, 100, 'UTF7-IMAP') . "]\n";
218+
219+
print "== GB18030 ==\n";
220+
221+
print "Invalid byte 0xF5: [" . bin2hex(mb_strcut("\xF5a", 1, 100, 'GB18030')) . "]\n";
222+
print "Double-byte char: [" . bin2hex(mb_strcut("\xAFw", -1, 100, "GB18030")) . "]\n";
223+
224+
print "== UHC ==\n";
225+
226+
print "Single byte 0x96: [" . bin2hex(mb_strcut("\x96", 1, 1280, "UHC")) . "]\n";
227+
57228
?>
58229
--EXPECT--
59230
== EUC-JP ==
@@ -72,6 +243,15 @@ OK
72243
[]
73244
[e288ae]
74245
[e288ae 20]
246+
[c3a5 42 c3a4 43 c3b6 44 c3bc]
247+
== UTF-16 ==
248+
Single byte: []
249+
With from=1: []
250+
Bad surrogate: []
251+
Bad surrogate followed by other bytes: [003f1243]
252+
BE byte order mark: []
253+
LE byte order mark: []
254+
Length=0: []
75255
== UTF-16LE ==
76256
[]
77257
[]
@@ -80,3 +260,125 @@ OK
80260
[1a04]
81261
[1a04]
82262
[1a04 3804]
263+
Single byte: []
264+
== UTF-32BE ==
265+
[]
266+
[0000222e]
267+
[0000222e]
268+
[0000222e 00000020]
269+
[0000222e 00000020]
270+
== ISO-2022-JP ==
271+
[]
272+
[]
273+
[]
274+
[]
275+
[]
276+
[1b244234411b2842]
277+
[]
278+
[]
279+
[1b244234411b2842]
280+
[]
281+
[1b24423b7a1b2842 20]
282+
[1b24423b7a1b2842 20 61 62]
283+
[1b24423b7a1b2842 20 61 62 63 20]
284+
[20 61 62 63 20 1b2442252b1b2842]
285+
[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
286+
[]
287+
Error followed by ASCII char: [4b]
288+
== ISO-2022-JP-2004 ==
289+
[]
290+
[]
291+
[]
292+
[]
293+
[]
294+
[]
295+
[1b24285134411b2842]
296+
[]
297+
[]
298+
[]
299+
[1b24285134411b2842]
300+
[]
301+
[1b24285134411b2842]
302+
[1b24285134411b2842 1b2428513b7a1b2842]
303+
[1b2428513b7a1b2842 20 61 62 63]
304+
[1b2428513b7a1b2842 20 61 62 63]
305+
[1b24285134411b2842 1b2428513b7a1b2842 20 61 62 63 20 1b242851252b1b2842 1b242851254a1b2842]
306+
[]
307+
== ISO-2022-JP-MS ==
308+
[]
309+
[]
310+
[]
311+
[]
312+
[]
313+
[1b244234411b2842]
314+
[1b244234411b2842]
315+
[]
316+
[]
317+
[1b244234411b2842]
318+
[1b244234411b2842]
319+
[]
320+
[1b24423b7a1b2842 20]
321+
[1b24423b7a1b2842 20 61 62]
322+
[1b24423b7a1b2842 20 61 62 63 20]
323+
[20 61 62 63 20 1b2442252b1b2842]
324+
[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
325+
[]
326+
== JIS ==
327+
[]
328+
[]
329+
[]
330+
[]
331+
[]
332+
[1b244234411b2842]
333+
[]
334+
[]
335+
[1b244234411b2842]
336+
[]
337+
[1b24423b7a1b2842 20]
338+
[1b24423b7a1b2842 20 61 62]
339+
[1b24423b7a1b2842 20 61 62 63 20]
340+
[20 61 62 63 20 1b2442252b1b2842]
341+
[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
342+
[]
343+
0xA3: []
344+
Bad escape sequence followed by null byte: []
345+
== ISO-2022-JP-KDDI ==
346+
[]
347+
[]
348+
[]
349+
[]
350+
[]
351+
[1b244234411b2842]
352+
[]
353+
[]
354+
[1b244234411b2842]
355+
[]
356+
[1b24423b7a1b2842 20]
357+
[1b24423b7a1b2842 20 61 62]
358+
[1b24423b7a1b2842 20 61 62 63 20]
359+
[20 61 62 63 20 1b2442252b1b2842]
360+
[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
361+
[]
362+
== CP50220 ==
363+
Single byte 0xFF: []
364+
Double byte 0xFF: [3f]
365+
Sample string with multiple null bytes: [1b2442255e001b2842]
366+
Bad escape sequence preceded by bad bytes: [3f3f3f00]
367+
Good JISX 0208 sequence, but it won't fit in max number of bytes: []
368+
Bad escape sequence followed by GR kana: []
369+
== UTF-7 ==
370+
Single byte 0x01: []
371+
UTF-16 section ends abruptly: []
372+
UTF-16 section ends abruptly in middle of 2nd codepoint: [+Qxw-]
373+
Cutting in middle of UTF-16 section: []
374+
Cutting in middle of UTF-16 section (2): []
375+
== UTF7-IMAP ==
376+
Single byte 0x01: [?]
377+
UTF-16 section ends abruptly: []
378+
UTF-16 section ends abruptly in middle of 2nd codepoint: []
379+
UTF-16 section is terminated improperly: []
380+
== GB18030 ==
381+
Invalid byte 0xF5: []
382+
Double-byte char: []
383+
== UHC ==
384+
Single byte 0x96: [96]

0 commit comments

Comments
 (0)