@@ -11,7 +11,21 @@ ini_set('include_path','.');
11
11
include_once ('common.inc ' );
12
12
13
13
// EUC-JP
14
- $ euc_jp = '0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。 ' ;
14
+ $ euc_jp = mb_convert_encoding ('0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。 ' , 'EUC-JP ' , 'UTF-8 ' );
15
+ // SJIS
16
+ $ sjis = mb_convert_encoding ('日本語テキストです。0123456789。 ' , 'SJIS ' , 'UTF-8 ' );
17
+ // ISO-2022-JP
18
+ $ iso2022jp = "\x1B\$B \x21\x21!r \x1B(BABC " ;
19
+ // GB-18030
20
+ $ gb18030 = mb_convert_encoding ('密码用户名密码名称名称 ' , 'GB18030 ' , 'UTF-8 ' );
21
+ // HZ
22
+ $ hz = "The next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye. " ;
23
+ // UTF-8
24
+ $ utf8 = "Greek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь " ;
25
+ // UTF-32
26
+ $ utf32 = mb_convert_encoding ($ utf8 , 'UTF-32 ' , 'UTF-8 ' );
27
+ // UTF-7
28
+ $ utf7 = mb_convert_encoding ($ utf8 , 'UTF-7 ' , 'UTF-8 ' );
15
29
16
30
print "1: " . bin2hex (mb_substr ($ euc_jp , 10 , 10 ,'EUC-JP ' )) . "\n" ;
17
31
print "2: " . bin2hex (mb_substr ($ euc_jp , 0 , 100 ,'EUC-JP ' )) . "\n" ;
@@ -20,12 +34,148 @@ $str = mb_substr($euc_jp, 100, 10,'EUC-JP');
20
34
// Note: returns last character
21
35
($ str === "" ) ? print "3 OK \n" : print "NG: " .bin2hex ($ str )."\n" ;
22
36
23
- $ str = mb_substr ($ euc_jp , -100 , 10 ,'EUC-JP ' );
24
- ($ str !== "" ) ? print "4 OK: " .bin2hex ($ str )."\n" : print "NG: " .bin2hex ($ str )."\n" ;
37
+ $ str = mb_substr ($ euc_jp , -100 , 10 , 'EUC-JP ' );
38
+ print ($ str !== "" ) ? "4 OK: " . bin2hex ($ str ) . "\n" : "BAD: " . bin2hex ($ str ) . "\n" ;
39
+
40
+ echo "SJIS: \n" ;
41
+ print "1: " . bin2hex (mb_substr ($ sjis , 0 , 3 , 'SJIS ' )) . "\n" ;
42
+ print "2: " . bin2hex (mb_substr ($ sjis , -1 , null , 'SJIS ' )) . "\n" ;
43
+ print "3: " . bin2hex (mb_substr ($ sjis , -5 , 3 , 'SJIS ' )) . "\n" ;
44
+ print "4: " . bin2hex (mb_substr ($ sjis , 1 , null , 'SJIS ' )) . "\n" ;
45
+ print "5: " . bin2hex (mb_substr ($ sjis , 10 , 0 , 'SJIS ' )) . "\n" ;
46
+ echo "-- Testing illegal SJIS byte 0x80 -- \n" ;
47
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS ' )) . "\n" ;
48
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS ' )) . "\n" ;
49
+
50
+ echo "SJIS-2004: \n" ;
51
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-2004 ' )) . "\n" ;
52
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-2004 ' )) . "\n" ;
53
+
54
+ echo "MacJapanese: \n" ;
55
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'MacJapanese ' )) . "\n" ;
56
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'MacJapanese ' )) . "\n" ;
57
+
58
+ echo "SJIS-Mobile#DOCOMO: \n" ;
59
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-Mobile#DOCOMO ' )) . "\n" ;
60
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-Mobile#DOCOMO ' )) . "\n" ;
61
+
62
+ echo "SJIS-Mobile#KDDI: \n" ;
63
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-Mobile#KDDI ' )) . "\n" ;
64
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-Mobile#KDDI ' )) . "\n" ;
65
+
66
+ echo "SJIS-Mobile#SoftBank: \n" ;
67
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 3 , 2 , 'SJIS-Mobile#SoftBank ' )) . "\n" ;
68
+ print bin2hex (mb_substr ("\x80abc \x80\xA1" , 0 , 3 , 'SJIS-Mobile#SoftBank ' )) . "\n" ;
69
+
70
+ echo "ISO-2022-JP: \n" ;
71
+ print "1: " . bin2hex (mb_substr ($ iso2022jp , 0 , 3 , 'ISO-2022-JP ' )) . "\n" ;
72
+ print "2: " . bin2hex (mb_substr ($ iso2022jp , -1 , null , 'ISO-2022-JP ' )) . "\n" ;
73
+ print "3: " . bin2hex (mb_substr ($ iso2022jp , -6 , 3 , 'ISO-2022-JP ' )) . "\n" ;
74
+ print "4: " . bin2hex (mb_substr ($ iso2022jp , -3 , 2 , 'ISO-2022-JP ' )) . "\n" ;
75
+ print "5: " . bin2hex (mb_substr ($ iso2022jp , 1 , null , 'ISO-2022-JP ' )) . "\n" ;
76
+ print "6: " . bin2hex (mb_substr ($ iso2022jp , 10 , 0 , 'ISO-2022-JP ' )) . "\n" ;
77
+ print "7: " . bin2hex (mb_substr ($ iso2022jp , 100 , 10 , 'ISO-2022-JP ' )) . "\n" ;
78
+
79
+ echo "GB-18030: \n" ;
80
+ print "1: " . bin2hex (mb_substr ($ gb18030 , 0 , 3 , 'GB-18030 ' )) . "\n" ;
81
+ print "2: " . bin2hex (mb_substr ($ gb18030 , -1 , null , 'GB-18030 ' )) . "\n" ;
82
+ print "3: " . bin2hex (mb_substr ($ gb18030 , -5 , 3 , 'GB-18030 ' )) . "\n" ;
83
+ print "4: " . bin2hex (mb_substr ($ gb18030 , 1 , null , 'GB-18030 ' )) . "\n" ;
84
+ print "5: " . bin2hex (mb_substr ($ gb18030 , 10 , 0 , 'GB-18030 ' )) . "\n" ;
85
+
86
+ echo "HZ: \n" ;
87
+ print "1: " . mb_substr ($ hz , 0 , 3 , 'HZ ' ) . "\n" ;
88
+ print "2: " . mb_substr ($ hz , -1 , null , 'HZ ' ) . "\n" ;
89
+ print "3: " . mb_substr ($ hz , -5 , 3 , 'HZ ' ) . "\n" ;
90
+ print "4: " . mb_substr ($ hz , 1 , null , 'HZ ' ) . "\n" ;
91
+ print "5: " . mb_substr ($ hz , 10 , 0 , 'HZ ' ) . "\n" ;
92
+
93
+ echo "UTF-8: \n" ;
94
+ print "1: " . mb_substr ($ utf8 , 0 , 3 , 'UTF-8 ' ) . "\n" ;
95
+ print "2: " . mb_substr ($ utf8 , -1 , null , 'UTF-8 ' ) . "\n" ;
96
+ print "3: " . mb_substr ($ utf8 , -5 , 3 , 'UTF-8 ' ) . "\n" ;
97
+ print "4: " . mb_substr ($ utf8 , 1 , null , 'UTF-8 ' ) . "\n" ;
98
+ print "5: " . mb_substr ($ utf8 , 10 , 0 , 'UTF-8 ' ) . "\n" ;
99
+
100
+ echo "UTF-32: \n" ;
101
+ print "1: " . mb_convert_encoding (mb_substr ($ utf32 , 0 , 3 , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
102
+ print "2: " . mb_convert_encoding (mb_substr ($ utf32 , -1 , null , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
103
+ print "3: " . mb_convert_encoding (mb_substr ($ utf32 , -5 , 3 , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
104
+ print "4: " . mb_convert_encoding (mb_substr ($ utf32 , 1 , null , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
105
+ print "5: " . mb_convert_encoding (mb_substr ($ utf32 , 10 , 0 , 'UTF-32 ' ), 'UTF-8 ' , 'UTF-32 ' ) . "\n" ;
106
+
107
+ echo "UTF-7: \n" ;
108
+ print "1: " . mb_convert_encoding (mb_substr ($ utf7 , 0 , 3 , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
109
+ print "2: " . mb_convert_encoding (mb_substr ($ utf7 , -1 , null , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
110
+ print "3: " . mb_convert_encoding (mb_substr ($ utf7 , -5 , 3 , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
111
+ print "4: " . mb_convert_encoding (mb_substr ($ utf7 , 1 , null , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
112
+ print "5: " . mb_convert_encoding (mb_substr ($ utf7 , 10 , 0 , 'UTF-7 ' ), 'UTF-8 ' , 'UTF-7 ' ) . "\n" ;
25
113
26
114
?>
27
115
--EXPECT--
28
116
1: c6fccbdcb8eca4c7a4b9a1a34555432d
29
117
2: 30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3
30
118
3 OK
31
119
4 OK: 30313233a4b3a4cecab8bbfacef3a4cf
120
+ SJIS:
121
+ 1: 93fa967b8cea
122
+ 2: 8142
123
+ 3: 825582568257
124
+ 4: 967b8cea8365834c8358836782c582b781423031323334825482558256825782588142
125
+ 5:
126
+ -- Testing illegal SJIS byte 0x80 --
127
+ 6380
128
+ 806162
129
+ SJIS-2004:
130
+ 6380
131
+ 806162
132
+ MacJapanese:
133
+ 6380
134
+ 806162
135
+ SJIS-Mobile#DOCOMO:
136
+ 6380
137
+ 806162
138
+ SJIS-Mobile#KDDI:
139
+ 6380
140
+ 806162
141
+ SJIS-Mobile#SoftBank:
142
+ 6380
143
+ 806162
144
+ ISO-2022-JP:
145
+ 1: 1b2442212121721b284241
146
+ 2: 43
147
+ 3: 1b2442212121721b284241
148
+ 4: 4142
149
+ 5: 1b244221721b2842414243
150
+ 6:
151
+ 7:
152
+ GB-18030:
153
+ 1: c3dcc2ebd3c3
154
+ 2: b3c6
155
+ 3: c2ebc3fbb3c6
156
+ 4: c2ebd3c3bba7c3fbc3dcc2ebc3fbb3c6c3fbb3c6
157
+ 5:
158
+ HZ:
159
+ 1: The
160
+ 2: .
161
+ 3: ~{!#~}By
162
+ 4: he next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.
163
+ 5:
164
+ UTF-8:
165
+ 1: Gre
166
+ 2: ь
167
+ 3: йте
168
+ 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
169
+ 5:
170
+ UTF-32:
171
+ 1: Gre
172
+ 2: ь
173
+ 3: йте
174
+ 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
175
+ 5:
176
+ UTF-7:
177
+ 1: Gre
178
+ 2: ь
179
+ 3: йте
180
+ 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
181
+ 5:
0 commit comments