1
1
# !./perl -w
2
2
3
- print " 1..76 \n " ;
3
+ print " 1..100 \n " ;
4
4
my $test = 0;
5
5
6
6
my %templates = (
@@ -13,7 +13,23 @@ sub bytes_to_utf {
13
13
my ($enc , $content , $do_bom ) = @_ ;
14
14
my $template = $templates {$enc };
15
15
die " Unsupported encoding $enc " unless $template ;
16
- return pack " $template *" , ($do_bom ? 0xFEFF : ()), unpack " U*" , $content ;
16
+ my @chars = unpack " U*" , $content ;
17
+ if ($enc ne ' utf8' ) {
18
+ # Make surrogate pairs
19
+ my @remember_that_utf_16_is_variable_length ;
20
+ foreach my $ord (@chars ) {
21
+ if ($ord < 0x10000) {
22
+ push @remember_that_utf_16_is_variable_length ,
23
+ $ord ;
24
+ } else {
25
+ $ord -= 0x10000;
26
+ push @remember_that_utf_16_is_variable_length ,
27
+ (0xD800 | ($ord >> 10)), (0xDC00 | ($ord & 0x3FF));
28
+ }
29
+ }
30
+ @chars = @remember_that_utf_16_is_variable_length ;
31
+ }
32
+ return pack " $template *" , ($do_bom ? 0xFEFF : ()), @chars ;
17
33
}
18
34
19
35
sub test {
@@ -45,8 +61,9 @@ for my $bom (0, 1) {
45
61
# right now, as here we're testing the input filter itself.
46
62
47
63
for my $expect (" N" , " \xFF " , " \x{100} " , " \x{010a} " , " \x{0a23} " ,
64
+ " \x{10000} " , " \x{64321} " , " \x{10FFFD} " ,
48
65
) {
49
- # A space so that the UTF-16 heuristc triggers - " '" gives two
66
+ # A space so that the UTF-16 heuristic triggers - " '" gives two
50
67
# characters of ASCII.
51
68
my $write = " '$expect '" ;
52
69
my $name = ' chrs ' . join ' , ' , map {ord $_ } split ' ' , $expect ;
0 commit comments