@@ -20,13 +20,37 @@ for ($i = 0; $i < 0x20; $i++) {
20
20
$ fromUnicode ["\x00" . chr ($ i )] = chr ($ i );
21
21
}
22
22
23
- /* U+007E is TILDE; convert to Shift-JIS 0x8160 (WAVE DASH) */
24
- $ fromUnicode ["\x00\x7E" ] = "\x81\x60" ;
23
+ /* According to the relevant Japan Industrial Standards Committee standards,
24
+ * SJIS 0x5C is a Yen sign, and 0x7E is an overline.
25
+ *
26
+ * However, this conflicts with the implementation of SJIS in various legacy
27
+ * software (notably Microsoft products), where SJIS 0x5C and 0x7E are taken
28
+ * as equivalent to the same ASCII bytes.
29
+ *
30
+ * Prior to PHP 8.1, mbstring's implementation of SJIS handled these bytes
31
+ * compatibly with Microsoft products. This was changed in PHP 8.1.0, in an
32
+ * attempt to comply with the JISC specifications. However, after discussion
33
+ * with various concerned Japanese developers, it seems that the historical
34
+ * behavior was more useful in the majority of applications which process
35
+ * SJIS-encoded text. */
36
+ $ validChars ["\x5C" ] = "\x00\x5C" ;
37
+ $ validChars ["\x7E" ] = "\x00\x7E" ;
38
+ $ fromUnicode ["\x00\x5C" ] = "\x5C" ;
39
+ $ fromUnicode ["\x00\x7E" ] = "\x7E" ;
40
+
41
+ /* That means it does not make sense to convert U+203E (OVERLINE)
42
+ * to 0x7E; convert it to JIS X 0208 FULLWIDTH MACRON instead */
43
+ $ fromUnicode ["\x20\x3E" ] = "\x81\x50" ;
44
+ /* U+00AF is MACRON; convert that to FULLWIDTH MACRON as well */
45
+ $ fromUnicode ["\x00\xAF" ] = "\x81\x50" ;
46
+ /* Since we are treating 0x5C as equivalent to U+005C, it does not
47
+ * make sense to convert U+00A5 (YEN SIGN) to 0x5C
48
+ * Convert it to JIS X 0208 FULLWIDTH YEN SIGN instead */
49
+ $ fromUnicode ["\x00\xA5" ] = "\x81\x8F" ;
50
+
25
51
/* DEL character */
26
52
$ validChars ["\x7F" ] = "\x00\x7F" ;
27
53
$ fromUnicode ["\x00\x7F" ] = "\x7F" ;
28
- /* U+00AF is MACRON; Shift-JIS 0x7E is overline */
29
- $ fromUnicode ["\x00\xAF" ] = "\x7E" ;
30
54
/* Use fullwidth reverse solidus, not (halfwidth) backslash (0x5C) */
31
55
$ validChars ["\x81\x5F" ] = "\xFF\x3C" ;
32
56
$ fromUnicode ["\xFF\x3C" ] = "\x81\x5F" ;
0 commit comments