@@ -41,19 +41,19 @@ echo "2: " . mb_decode_numericentity($str2, $convmap, "UTF-8") . "\n";
41
41
echo "3: " . mb_decode_numericentity ($ str3 , $ convmap , "UTF-8 " ) . "\n" ;
42
42
43
43
// Numeric entities which are truncated at end of string
44
- // We do NOT decode such entities; they can be terminated by any non-digit character, but not by the end of the string
45
- echo "4: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
46
- echo "5: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
47
- echo "6: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
48
- echo "7: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
49
- echo "8: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
50
- echo "9: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
51
- echo "10: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
52
- echo "11: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
44
+ echo "4: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Entity is too big
45
+ echo "5: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Entity is too big
46
+ echo "6: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
47
+ echo "7: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
48
+ echo "8: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
49
+ echo "9: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
50
+ echo "10: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
51
+ echo "11: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
53
52
// Try with hex, not just decimal entities
54
- echo "11b: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
55
- echo "11c: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ;
56
- echo "11d: " . mb_decode_numericentity ('𐀀 ' , $ convmap ), "\n" ;
53
+ echo "11b: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
54
+ echo "11c: " . bin2hex (mb_decode_numericentity ('� ' , $ convmap )), "\n" ; // OK
55
+ echo "11d: " . bin2hex (mb_decode_numericentity ('𐀀 ' , $ convmap )), "\n" ; // OK
56
+ echo "11e: " . mb_decode_numericentity ('� ' , $ convmap ), "\n" ; // Too many digits
57
57
58
58
// Large decimal entity, converting from non-ASCII input encoding
59
59
echo "12: " . bin2hex (mb_decode_numericentity (mb_convert_encoding ('� ' , 'UCS-4 ' , 'ASCII ' ), [0 , 0x7FFFFFFF , 0 , 0x7FFFFFFF ], 'UCS-4 ' )), "\n" ;
@@ -100,6 +100,8 @@ test("Successive &", "&A,", "&A,", [0, 0xFFFF, 0, 0xFFFF], 'ASCII');
100
100
test ("Successive &# " , "&#2 " , " " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
101
101
test ("Successive &#x " , "&#x2 " , " " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
102
102
103
+ test ("&#x only " , "&#x; " , "&#x; " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
104
+
103
105
// The starting & of an entity can terminate a preceding entity
104
106
test ("Successive A " , "AA " , "AA " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
105
107
test ("Successive hex entities " , "22 " , "22 " , [0 , 0xFFFF , 0 , 0xFFFF ], 'ASCII ' );
@@ -131,6 +133,36 @@ test("Regression test (truncation of successive & with JIS encoding)", "&&&", "&
131
133
// Previously, signed arithmetic was used on convmap entries
132
134
test ("Regression test (convmap entries are now treated as unsigned) " , ", " , "?, " , [0x22FFFF11 , 0xBF111189 , 0x67726511 , 0x1161E719 ], "ASCII " );
133
135
136
+ // Try with '&', '&#', or '&#' at the end of a buffer of wchars, with more input
137
+ // still left to process in the next buffer
138
+ // (mb_decode_numericentity splits its input into 'chunks' and processes it one
139
+ // chunk at a time)
140
+ $ convmap = [0 , 0xFFFF , 0 , 0xFFFF ];
141
+ for ($ i = 0 ; $ i < 256 ; $ i ++) {
142
+ $ padding = str_repeat ("a " , $ i );
143
+ // First try invalid decimal/hex entities
144
+ if (mb_decode_numericentity ($ padding . "&#ZZZ " , $ convmap , 'UTF-8 ' ) !== $ padding . "&#ZZZ " )
145
+ die ("&#ZZZ is broken when it spans two buffers! " );
146
+ if (mb_decode_numericentity ($ padding . "&#xZZZ " , $ convmap , 'UTF-8 ' ) !== $ padding . "&#xZZZ " )
147
+ die ("&#xZZZ is broken when it spans two buffers! " );
148
+ // Now try valid decimal/hex entities
149
+ if (mb_decode_numericentity ($ padding . "A " , $ convmap , 'UTF-8 ' ) !== $ padding . "A " )
150
+ die ("A is broken when it spans two buffers! " );
151
+ if (mb_decode_numericentity ($ padding . "A " , $ convmap , 'UTF-8 ' ) !== $ padding . "A " )
152
+ die ("A is broken when it spans two buffers! " );
153
+ }
154
+
155
+ // Try huge entities, big enough to fill an entire buffer
156
+ for ($ i = 12 ; $ i < 256 ; $ i ++) {
157
+ $ str = "&# " . str_repeat ("0 " , $ i ) . "65 " ;
158
+ if (mb_decode_numericentity ($ str , $ convmap , 'UTF-8 ' ) !== $ str )
159
+ die ("Decimal entity with huge number of digits broken " );
160
+
161
+ $ str = "&#x " . str_repeat ("0 " , $ i ) . "41 " ;
162
+ if (mb_decode_numericentity ($ str , $ convmap , 'UTF-8 ' ) !== $ str )
163
+ die ("Hexadecimal entity with huge number of digits broken " );
164
+ }
165
+
134
166
?>
135
167
--EXPECT--
136
168
1: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
@@ -142,11 +174,12 @@ test("Regression test (convmap entries are now treated as unsigned)", ",", "?
142
174
7: �
143
175
8: �
144
176
9: �
145
- 10: �
146
- 11: �
147
- 11b: �
148
- 11c: �
149
- 11d: 𐀀
177
+ 10: 00
178
+ 11: 00
179
+ 11b: 00
180
+ 11c: 00
181
+ 11d: f0908080
182
+ 11e: �
150
183
12: 00bc614e
151
184
13: föo
152
185
14: mb_decode_numericentity(): Argument #2 ($map) must have a multiple of 4 elements
@@ -164,6 +197,7 @@ Single &: string(1) "&" => string(1) "&" (Good)
164
197
Successive &: string(6) "&A," => string(3) "&A," (Good)
165
198
Successive &#: string(8) "&#2" => string(3) "" (Good)
166
199
Successive &#x: string(9) "&#x2" => string(4) "" (Good)
200
+ &#x only: string(4) "&#x;" => string(4) "&#x;" (Good)
167
201
Successive A: string(9) "AA" => string(2) "AA" (Good)
168
202
Successive hex entities: string(11) "22" => string(2) "22" (Good)
169
203
Starting entity immediately after decimal entity which is too long: string(18) "�A" => string(14) "�A" (Good)
0 commit comments