@@ -16,22 +16,26 @@ function MBStringChars($string, $encoding) {
16
16
ini_set ('include_path ' , __DIR__ );
17
17
include_once ('common.inc ' );
18
18
19
- // EUC-JP
20
19
$ euc_jp = pack ('H* ' , '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3 ' );
21
- // UTF-8
22
- $ utf8 = pack ('H* ' , 'e288ae2045e28b856461203d2051 ' ); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
23
- // UTF-16LE
20
+ $ utf8 = pack ('H* ' , 'e288ae2045e28b856461203d2051 ' ); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
24
21
$ utf16le = pack ('H* ' , '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300 ' );
22
+ $ utf32be = mb_convert_encoding ($ utf8 , 'UTF-32BE ' , 'UTF-8 ' );
23
+ $ iso2022jp = mb_convert_encoding ("漢字 abc カナ " , 'ISO-2022-JP ' , 'UTF-8 ' ); // [<escape sequence>1b2442 3441 3b7a <escape sequence>1b2842 20 61 62 63 20 <escape sequence>1b2442 252b 254a <escape sequence>1b2842]
24
+ $ jis = mb_convert_encoding ("漢字 abc カナ " , 'JIS ' , 'UTF-8 ' );
25
+ // For testing ISO-2022-JP-2004, add a Kanji character which is in JISX 0213
26
+ $ iso2022jp2004 = mb_convert_encoding ("漢字 abc カナ凜 " , 'ISO-2022-JP-2004 ' , 'UTF-8 ' ); // [1b242851 3441 3b7a 1b2842 20 61 62 63 20 1b242851 252b 254a 7425 1b2842]
27
+ $ iso2022jpms = mb_convert_encoding ("漢字 abc カナ " , 'ISO-2022-JP-MS ' , 'UTF-8 ' ); // [1b2442 3441 3b7a 1b2842 20 61 62 63 20 1b2442 252b 254a 1b2842]
28
+ $ iso2022jp_kddi = mb_convert_encoding ("漢字 abc カナ " , 'ISO-2022-JP-KDDI ' , 'UTF-8 ' );
25
29
26
30
print "== EUC-JP == \n" ;
27
- print MBStringChars (mb_strcut ($ euc_jp , 6 , 5 ,'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
28
- print MBStringChars (mb_strcut ($ euc_jp , 5 , 5 ,'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
29
- print MBStringChars (mb_strcut ($ euc_jp , 0 , 100 ,'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
31
+ print MBStringChars (mb_strcut ($ euc_jp , 6 , 5 , 'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
32
+ print MBStringChars (mb_strcut ($ euc_jp , 5 , 5 , 'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
33
+ print MBStringChars (mb_strcut ($ euc_jp , 0 , 100 , 'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
30
34
31
- $ str = mb_strcut ($ euc_jp , 100 , 10 ,'EUC-JP ' );
35
+ $ str = mb_strcut ($ euc_jp , 100 , 10 , 'EUC-JP ' );
32
36
($ str === "" ) ? print "OK \n" : print "No good \n" ;
33
37
34
- $ str = mb_strcut ($ euc_jp , -100 , 10 ,'EUC-JP ' );
38
+ $ str = mb_strcut ($ euc_jp , -100 , 10 , 'EUC-JP ' );
35
39
($ str !== "" ) ? print "OK \n" : print "No good \n" ;
36
40
37
41
print "== UTF-8 == \n" ;
@@ -45,6 +49,17 @@ print MBStringChars(mb_strcut($utf8, 1, 2, 'UTF-8'), 'UTF-8') . "\n";
45
49
print MBStringChars (mb_strcut ($ utf8 , 1 , 3 , 'UTF-8 ' ), 'UTF-8 ' ) . "\n" ;
46
50
print MBStringChars (mb_strcut ($ utf8 , 1 , 4 , 'UTF-8 ' ), 'UTF-8 ' ) . "\n" ;
47
51
52
+ print MBStringChars (mb_strcut ('AåBäCöDü ' , 2 , 100 , 'UTF-8 ' ), 'UTF-8 ' ) . "\n" ;
53
+
54
+ print "== UTF-16 == \n" ;
55
+ print "Single byte: [ " . bin2hex (mb_strcut ("\xFF" , 0 , 100 , 'UTF-16 ' )) . "] \n" ;
56
+ print "With from=1: [ " . bin2hex (mb_strcut ("\xff\x01" , 1 , 100 , "UTF-16 " )) . "] \n" ;
57
+ print "Bad surrogate: [ " . bin2hex (mb_strcut ("\xD9\xFF" , 0 , 100 , "UTF-16 " )) . "] \n" ;
58
+ print "Bad surrogate followed by other bytes: [ " . bin2hex (mb_strcut ("\xd9\x00\x12C " , 0 , 100 , "UTF-16 " )) . "] \n" ;
59
+ print "BE byte order mark: [ " . bin2hex (mb_strcut ("\xFE\xFF" , 0 , 100 , "UTF-16 " )) . "] \n" ;
60
+ print "LE byte order mark: [ " . bin2hex (mb_strcut ("\xFF\xFE" , 0 , 100 , "UTF-16 " )) . "] \n" ;
61
+ print "Length=0: [ " . bin2hex (mb_strcut ("\x00\x01\x00\x00" , 1 , -512 , "UTF-16 " )) . "] \n" ;
62
+
48
63
print "== UTF-16LE == \n" ;
49
64
print MBStringChars (mb_strcut ($ utf16le , 0 , 0 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
50
65
print MBStringChars (mb_strcut ($ utf16le , 0 , 1 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
@@ -54,6 +69,162 @@ print MBStringChars(mb_strcut($utf16le, 1, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
54
69
print MBStringChars (mb_strcut ($ utf16le , 1 , 3 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
55
70
print MBStringChars (mb_strcut ($ utf16le , 1 , 4 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
56
71
72
+ print "Single byte: [ " . bin2hex (mb_strcut ("\xFF" , 0 , 100 , 'UTF-16LE ' )) . "] \n" ;
73
+
74
+ print "== UTF-32BE == \n" ;
75
+ print MBStringChars (mb_strcut ($ utf32be , 0 , 3 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
76
+ print MBStringChars (mb_strcut ($ utf32be , 0 , 4 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
77
+ print MBStringChars (mb_strcut ($ utf32be , 0 , 5 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
78
+ print MBStringChars (mb_strcut ($ utf32be , 1 , 8 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
79
+ print MBStringChars (mb_strcut ($ utf32be , 3 , 9 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
80
+
81
+ print "== ISO-2022-JP == \n" ;
82
+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 3 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
83
+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 4 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
84
+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 5 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
85
+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 6 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
86
+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 7 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
87
+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 8 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
88
+
89
+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 3 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
90
+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 6 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
91
+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 8 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
92
+
93
+ print MBStringChars (mb_strcut ($ iso2022jp , 2 , 5 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
94
+ print MBStringChars (mb_strcut ($ iso2022jp , 5 , 9 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
95
+ print MBStringChars (mb_strcut ($ iso2022jp , 5 , 11 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
96
+ print MBStringChars (mb_strcut ($ iso2022jp , 6 , 13 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
97
+ print MBStringChars (mb_strcut ($ iso2022jp , 7 , 13 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
98
+
99
+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 100 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
100
+ print MBStringChars (mb_strcut ($ iso2022jp , 50 , 100 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
101
+
102
+ print "Error followed by ASCII char: [ " . bin2hex (mb_strcut ("\xdaK " , 0 , 100 , "ISO-2022-JP " )) . "] \n" ;
103
+
104
+ print "== ISO-2022-JP-2004 == \n" ;
105
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 3 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
106
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 4 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
107
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 5 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
108
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 6 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
109
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 7 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
110
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 8 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
111
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 9 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
112
+
113
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 3 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
114
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 6 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
115
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 8 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
116
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 9 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
117
+
118
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 2 , 5 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
119
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 5 , 9 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
120
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 5 , 11 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
121
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 6 , 13 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
122
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 7 , 13 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
123
+
124
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 100 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
125
+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 50 , 100 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
126
+
127
+ print "== ISO-2022-JP-MS == \n" ;
128
+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 3 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
129
+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 4 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
130
+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 5 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
131
+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 6 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
132
+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 7 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
133
+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 8 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
134
+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 9 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
135
+
136
+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 3 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
137
+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 6 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
138
+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 8 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
139
+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 9 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
140
+
141
+ print MBStringChars (mb_strcut ($ iso2022jpms , 2 , 5 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
142
+ print MBStringChars (mb_strcut ($ iso2022jpms , 5 , 9 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
143
+ print MBStringChars (mb_strcut ($ iso2022jpms , 5 , 11 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
144
+ print MBStringChars (mb_strcut ($ iso2022jpms , 6 , 13 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
145
+ print MBStringChars (mb_strcut ($ iso2022jpms , 7 , 13 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
146
+
147
+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 100 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
148
+ print MBStringChars (mb_strcut ($ iso2022jpms , 50 , 100 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
149
+
150
+ print "== JIS == \n" ;
151
+ print MBStringChars (mb_strcut ($ jis , 0 , 3 , 'JIS ' ), 'JIS ' ) . "\n" ;
152
+ print MBStringChars (mb_strcut ($ jis , 0 , 4 , 'JIS ' ), 'JIS ' ) . "\n" ;
153
+ print MBStringChars (mb_strcut ($ jis , 0 , 5 , 'JIS ' ), 'JIS ' ) . "\n" ;
154
+ print MBStringChars (mb_strcut ($ jis , 0 , 6 , 'JIS ' ), 'JIS ' ) . "\n" ;
155
+ print MBStringChars (mb_strcut ($ jis , 0 , 7 , 'JIS ' ), 'JIS ' ) . "\n" ;
156
+ print MBStringChars (mb_strcut ($ jis , 0 , 8 , 'JIS ' ), 'JIS ' ) . "\n" ;
157
+
158
+ print MBStringChars (mb_strcut ($ jis , 1 , 3 , 'JIS ' ), 'JIS ' ) . "\n" ;
159
+ print MBStringChars (mb_strcut ($ jis , 1 , 6 , 'JIS ' ), 'JIS ' ) . "\n" ;
160
+ print MBStringChars (mb_strcut ($ jis , 1 , 8 , 'JIS ' ), 'JIS ' ) . "\n" ;
161
+
162
+ print MBStringChars (mb_strcut ($ jis , 2 , 5 , 'JIS ' ), 'JIS ' ) . "\n" ;
163
+ print MBStringChars (mb_strcut ($ jis , 5 , 9 , 'JIS ' ), 'JIS ' ) . "\n" ;
164
+ print MBStringChars (mb_strcut ($ jis , 5 , 11 , 'JIS ' ), 'JIS ' ) . "\n" ;
165
+ print MBStringChars (mb_strcut ($ jis , 6 , 13 , 'JIS ' ), 'JIS ' ) . "\n" ;
166
+ print MBStringChars (mb_strcut ($ jis , 7 , 13 , 'JIS ' ), 'JIS ' ) . "\n" ;
167
+
168
+ print MBStringChars (mb_strcut ($ jis , 1 , 100 , 'JIS ' ), 'JIS ' ) . "\n" ;
169
+ print MBStringChars (mb_strcut ($ jis , 50 , 100 , 'JIS ' ), 'JIS ' ) . "\n" ;
170
+
171
+ print "0xA3: [ " . bin2hex (mb_strcut ("\xA3aaaaaa " , 0 , 100 , 'JIS ' )) . "] \n" ;
172
+ print "Bad escape sequence followed by null byte: [ " . bin2hex (mb_strcut ("\x1b\x00" , 1 , 100 , "JIS " )) . "] \n" ;
173
+
174
+ print "== ISO-2022-JP-KDDI == \n" ;
175
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 3 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
176
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 4 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
177
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 5 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
178
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 6 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
179
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 7 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
180
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 8 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
181
+
182
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 3 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
183
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 6 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
184
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 8 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
185
+
186
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 2 , 5 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
187
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 5 , 9 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
188
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 5 , 11 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
189
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 6 , 13 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
190
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 7 , 13 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
191
+
192
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 100 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
193
+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 50 , 100 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
194
+
195
+ print "== CP50220 == \n" ;
196
+
197
+ print "Single byte 0xFF: [ " . bin2hex (mb_strcut ("\xFF" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
198
+ print "Double byte 0xFF: [ " . bin2hex (mb_strcut ("\xFF\xFF" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
199
+ print "Sample string with multiple null bytes: [ " . bin2hex (mb_strcut ("\xCF\x00\x00\x00\x00\x00d \x00\x00" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
200
+ print "Bad escape sequence preceded by bad bytes: [ " . bin2hex (mb_strcut ("\xFF\xFF\x1B\x00" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
201
+ print "Good JISX 0208 sequence, but it won't fit in max number of bytes: [ " . bin2hex (mb_strcut ("\x1B\$BGV \x17" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
202
+ print "Bad escape sequence followed by GR kana: [ " . bin2hex (mb_strcut ("\x1B\$\xAC\x13" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
203
+
204
+ print "== UTF-7 == \n" ;
205
+
206
+ print "Single byte 0x01: [ " . mb_strcut ("\x01" , 0 , 100 , 'UTF-7 ' ) . "] \n" ;
207
+ print "UTF-16 section ends abruptly: [ " . mb_strcut ("+Q " , 1 , 100 , 'UTF-7 ' ) . "] \n" ;
208
+ print "UTF-16 section ends abruptly in middle of 2nd codepoint: [ " . mb_strcut ("+QxxC " , 0 , 100 , 'UTF-7 ' ) . "] \n" ;
209
+ print "Cutting in middle of UTF-16 section: [ " . mb_strcut ("+UUU " , -1 , 255 , "UTF-7 " ) . "] \n" ;
210
+ print "Cutting in middle of UTF-16 section (2): [ " . mb_strcut ("+UUUU " , -2 , 255 , "UTF-7 " ) . "] \n" ;
211
+
212
+ print "== UTF7-IMAP == \n" ;
213
+
214
+ print "Single byte 0x01: [ " . mb_strcut ("\x01" , 0 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
215
+ print "UTF-16 section ends abruptly: [ " . mb_strcut ("&Q " , 1 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
216
+ print "UTF-16 section ends abruptly in middle of 2nd codepoint: [ " . mb_strcut ("&QxxC " , 0 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
217
+ print "UTF-16 section is terminated improperly: [ " . mb_strcut ("&i6o \x83" , 0 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
218
+
219
+ print "== GB18030 == \n" ;
220
+
221
+ print "Invalid byte 0xF5: [ " . bin2hex (mb_strcut ("\xF5a " , 1 , 100 , 'GB18030 ' )) . "] \n" ;
222
+ print "Double-byte char: [ " . bin2hex (mb_strcut ("\xAFw " , -1 , 100 , "GB18030 " )) . "] \n" ;
223
+
224
+ print "== UHC == \n" ;
225
+
226
+ print "Single byte 0x96: [ " . bin2hex (mb_strcut ("\x96" , 1 , 1280 , "UHC " )) . "] \n" ;
227
+
57
228
?>
58
229
--EXPECT--
59
230
== EUC-JP ==
72
243
[]
73
244
[e288ae]
74
245
[e288ae 20]
246
+ [c3a5 42 c3a4 43 c3b6 44 c3bc]
247
+ == UTF-16 ==
248
+ Single byte: []
249
+ With from=1: []
250
+ Bad surrogate: []
251
+ Bad surrogate followed by other bytes: [003f1243]
252
+ BE byte order mark: []
253
+ LE byte order mark: []
254
+ Length=0: []
75
255
== UTF-16LE ==
76
256
[]
77
257
[]
80
260
[1a04]
81
261
[1a04]
82
262
[1a04 3804]
263
+ Single byte: []
264
+ == UTF-32BE ==
265
+ []
266
+ [0000222e]
267
+ [0000222e]
268
+ [0000222e 00000020]
269
+ [0000222e 00000020]
270
+ == ISO-2022-JP ==
271
+ []
272
+ []
273
+ []
274
+ []
275
+ []
276
+ [1b244234411b2842]
277
+ []
278
+ []
279
+ [1b244234411b2842]
280
+ []
281
+ [1b24423b7a1b2842 20]
282
+ [1b24423b7a1b2842 20 61 62]
283
+ [1b24423b7a1b2842 20 61 62 63 20]
284
+ [20 61 62 63 20 1b2442252b1b2842]
285
+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
286
+ []
287
+ Error followed by ASCII char: [4b]
288
+ == ISO-2022-JP-2004 ==
289
+ []
290
+ []
291
+ []
292
+ []
293
+ []
294
+ []
295
+ [1b24285134411b2842]
296
+ []
297
+ []
298
+ []
299
+ [1b24285134411b2842]
300
+ []
301
+ [1b24285134411b2842]
302
+ [1b24285134411b2842 1b2428513b7a1b2842]
303
+ [1b2428513b7a1b2842 20 61 62 63]
304
+ [1b2428513b7a1b2842 20 61 62 63]
305
+ [1b24285134411b2842 1b2428513b7a1b2842 20 61 62 63 20 1b242851252b1b2842 1b242851254a1b2842]
306
+ []
307
+ == ISO-2022-JP-MS ==
308
+ []
309
+ []
310
+ []
311
+ []
312
+ []
313
+ [1b244234411b2842]
314
+ [1b244234411b2842]
315
+ []
316
+ []
317
+ [1b244234411b2842]
318
+ [1b244234411b2842]
319
+ []
320
+ [1b24423b7a1b2842 20]
321
+ [1b24423b7a1b2842 20 61 62]
322
+ [1b24423b7a1b2842 20 61 62 63 20]
323
+ [20 61 62 63 20 1b2442252b1b2842]
324
+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
325
+ []
326
+ == JIS ==
327
+ []
328
+ []
329
+ []
330
+ []
331
+ []
332
+ [1b244234411b2842]
333
+ []
334
+ []
335
+ [1b244234411b2842]
336
+ []
337
+ [1b24423b7a1b2842 20]
338
+ [1b24423b7a1b2842 20 61 62]
339
+ [1b24423b7a1b2842 20 61 62 63 20]
340
+ [20 61 62 63 20 1b2442252b1b2842]
341
+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
342
+ []
343
+ 0xA3: []
344
+ Bad escape sequence followed by null byte: []
345
+ == ISO-2022-JP-KDDI ==
346
+ []
347
+ []
348
+ []
349
+ []
350
+ []
351
+ [1b244234411b2842]
352
+ []
353
+ []
354
+ [1b244234411b2842]
355
+ []
356
+ [1b24423b7a1b2842 20]
357
+ [1b24423b7a1b2842 20 61 62]
358
+ [1b24423b7a1b2842 20 61 62 63 20]
359
+ [20 61 62 63 20 1b2442252b1b2842]
360
+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
361
+ []
362
+ == CP50220 ==
363
+ Single byte 0xFF: []
364
+ Double byte 0xFF: [3f]
365
+ Sample string with multiple null bytes: [1b2442255e001b2842]
366
+ Bad escape sequence preceded by bad bytes: [3f3f3f00]
367
+ Good JISX 0208 sequence, but it won't fit in max number of bytes: []
368
+ Bad escape sequence followed by GR kana: []
369
+ == UTF-7 ==
370
+ Single byte 0x01: []
371
+ UTF-16 section ends abruptly: []
372
+ UTF-16 section ends abruptly in middle of 2nd codepoint: [+Qxw-]
373
+ Cutting in middle of UTF-16 section: []
374
+ Cutting in middle of UTF-16 section (2): []
375
+ == UTF7-IMAP ==
376
+ Single byte 0x01: [?]
377
+ UTF-16 section ends abruptly: []
378
+ UTF-16 section ends abruptly in middle of 2nd codepoint: []
379
+ UTF-16 section is terminated improperly: []
380
+ == GB18030 ==
381
+ Invalid byte 0xF5: []
382
+ Double-byte char: []
383
+ == UHC ==
384
+ Single byte 0x96: [96]
0 commit comments