diff options
| author | Bruce Momjian | 2012-07-05 01:47:49 +0000 |
|---|---|---|
| committer | Bruce Momjian | 2012-07-05 01:47:49 +0000 |
| commit | 042d9ffc282a8c796d2a5babc600c1a6db150dac (patch) | |
| tree | 3204f89a7552bb9211aa9565f18367da150ffe60 /src/backend/utils/mb | |
| parent | d7c734841b3e6cb44de363a8a3d83c35b75b30d9 (diff) | |
Run newly-configured perltidy script on Perl files.
Run on HEAD and 9.2.
Diffstat (limited to 'src/backend/utils/mb')
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_BIG5.pl | 132 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl | 76 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl | 279 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl | 172 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl | 76 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl | 97 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_GB18030.pl | 76 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl | 211 | ||||
| -rwxr-xr-x | src/backend/utils/mb/Unicode/UCS_to_SJIS.pl | 123 | ||||
| -rw-r--r-- | src/backend/utils/mb/Unicode/UCS_to_most.pl | 142 | ||||
| -rw-r--r-- | src/backend/utils/mb/Unicode/ucs2utf.pl | 38 |
11 files changed, 847 insertions, 575 deletions
diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl index b41e79703be..06d924853f3 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl @@ -33,68 +33,82 @@ require "ucs2utf.pl"; # $in_file = "BIG5.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = $code; + $array{$utf} = $code; } } -close( FILE ); +close(FILE); $in_file = "CP950.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); # Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc # from CP950.TXT - if( $code >= 0x80 && $ucs >= 0x0080 && - $code >= 0xf9d6 && $code <= 0xf9dc ){ + if ( $code >= 0x80 + && $ucs >= 0x0080 + && $code >= 0xf9d6 + && $code <= 0xf9dc) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = $code; + $array{$utf} = $code; } } -close( FILE ); +close(FILE); $file = lc("utf8_to_big5.map"); -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmapBIG5[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -107,67 +121,81 @@ close(FILE); # $in_file = "BIG5.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); $in_file = "CP950.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); # Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc # from CP950.TXT - if( $code >= 0x80 && $ucs >= 0x0080 && - $code >= 0xf9d6 && $code <= 0xf9dc ){ + if ( $code >= 0x80 + && $ucs >= 0x0080 + && $code >= 0xf9d6 + && $code <= 0xf9dc) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); $file = lc("big5_to_utf8.map"); -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmapBIG5[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl index 0aa94c2b279..38c8ccd8806 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl @@ -22,43 +22,51 @@ require "ucs2utf.pl"; $in_file = "GB2312.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = ($code | 0x8080); + $array{$utf} = ($code | 0x8080); } } -close( FILE ); +close(FILE); # # first, generate UTF8 --> EUC_CN table # $file = "utf8_to_euc_cn.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmapEUC_CN[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -71,39 +79,47 @@ close(FILE); # reset 'array'; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate code: %04x\n", $ucs; next; } $count++; $code |= 0x8080; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); $file = "euc_cn_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmapEUC_CN[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl index 797f825e336..b381aa65720 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl @@ -15,89 +15,110 @@ $TEST = 1; $in_file = "euc-jis-2004-std.txt"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; reset 'array1'; reset 'comment'; reset 'comment1'; -while($line = <FILE> ){ - if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs = hex($u1); - $utf1 = &ucs2utf($ucs); - $ucs = hex($u2); - $utf2 = &ucs2utf($ucs); - $str = sprintf "%08x%08x", $utf1, $utf2; - $array1{ $str } = $code; - $comment1{ $str } = $rest; +while ($line = <FILE>) +{ + if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u1 = $2; + $u2 = $3; + $rest = "U+" . $u1 . "+" . $u2 . $4; + $code = hex($c); + $ucs = hex($u1); + $utf1 = &ucs2utf($ucs); + $ucs = hex($u2); + $utf2 = &ucs2utf($ucs); + $str = sprintf "%08x%08x", $utf1, $utf2; + $array1{$str} = $code; + $comment1{$str} = $rest; $count1++; next; - } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; + } + elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u = $2; $rest = "U+" . $u . $3; - } else { + } + else + { next; } - $ucs = hex($u); + $ucs = hex($u); $code = hex($c); - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + $utf = &ucs2utf($ucs); + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = $code; - $comment{ $code } = $rest; + $array{$utf} = $code; + $comment{$code} = $rest; } -close( FILE ); +close(FILE); $file = "utf8_to_euc_jis_2004.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n"; print FILE " */\n"; print FILE "static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ - printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code }; - } else { - printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code }; + if ($count == 0) + { + printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, + $comment{$code}; + } + else + { + printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, + $comment{$code}; } } print FILE "};\n"; close(FILE); -if ($TEST == 1) { +if ($TEST == 1) +{ $file1 = "utf8.data"; $file2 = "euc_jis_2004.data"; - open( FILE1, "> $file1" ) || die( "cannot open $file1" ); - open( FILE2, "> $file2" ) || die( "cannot open $file2" ); - - for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; - if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d && - $code != 0x5c && - ($code < 0x80 || - ($code >= 0x8ea1 && $code <= 0x8efe) || - ($code >= 0x8fa1a1 && $code <= 0x8ffefe) || - ($code >= 0xa1a1 && $code <= 0x8fefe))) { - for ($i = 3; $i >= 0; $i--) { - $s = $i * 8; + open(FILE1, "> $file1") || die("cannot open $file1"); + open(FILE2, "> $file2") || die("cannot open $file2"); + + for $index (sort { $a <=> $b } keys(%array)) + { + $code = $array{$index}; + if ( $code > 0x00 + && $code != 0x09 + && $code != 0x0a + && $code != 0x0d + && $code != 0x5c + && ( $code < 0x80 + || ($code >= 0x8ea1 && $code <= 0x8efe) + || ($code >= 0x8fa1a1 && $code <= 0x8ffefe) + || ($code >= 0xa1a1 && $code <= 0x8fefe))) + { + for ($i = 3; $i >= 0; $i--) + { + $s = $i * 8; $mask = 0xff << $s; - print FILE1 pack("C", ($index & $mask) >> $s) if $index & $mask; + print FILE1 pack("C", ($index & $mask) >> $s) + if $index & $mask; print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask; } print FILE1 "\n"; @@ -107,46 +128,62 @@ if ($TEST == 1) { } $file = "utf8_to_euc_jis_2004_combined.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n"; print FILE " */\n"; -print FILE "static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n"; +print FILE + "static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n"; -for $index ( sort {$a cmp $b} keys( %array1 ) ){ - $code = $array1{ $index }; +for $index (sort { $a cmp $b } keys(%array1)) +{ + $code = $array1{$index}; $count1--; - if( $count1 == 0 ){ - printf FILE " {0x%s, 0x%s, 0x%06x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index }; - } else { - printf FILE " {0x%s, 0x%s, 0x%06x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index }; + if ($count1 == 0) + { + printf FILE " {0x%s, 0x%s, 0x%06x} /* %s */\n", substr($index, 0, 8), + substr($index, 8, 8), $code, $comment1{$index}; + } + else + { + printf FILE " {0x%s, 0x%s, 0x%06x}, /* %s */\n", + substr($index, 0, 8), substr($index, 8, 8), $code, + $comment1{$index}; } } print FILE "};\n"; close(FILE); -if ($TEST == 1) { - for $index ( sort {$a cmp $b} keys( %array1 ) ){ - $code = $array1{ $index }; - if ($code > 0x00 && $code != 0x09 && $code != 0x0a && $code != 0x0d && - $code != 0x5c && - ($code < 0x80 || - ($code >= 0x8ea1 && $code <= 0x8efe) || - ($code >= 0x8fa1a1 && $code <= 0x8ffefe) || - ($code >= 0xa1a1 && $code <= 0x8fefe))) { +if ($TEST == 1) +{ + for $index (sort { $a cmp $b } keys(%array1)) + { + $code = $array1{$index}; + if ( $code > 0x00 + && $code != 0x09 + && $code != 0x0a + && $code != 0x0d + && $code != 0x5c + && ( $code < 0x80 + || ($code >= 0x8ea1 && $code <= 0x8efe) + || ($code >= 0x8fa1a1 && $code <= 0x8ffefe) + || ($code >= 0xa1a1 && $code <= 0x8fefe))) + { $v1 = hex(substr($index, 0, 8)); $v2 = hex(substr($index, 8, 8)); - for ($i = 3; $i >= 0; $i--) { - $s = $i * 8; + for ($i = 3; $i >= 0; $i--) + { + $s = $i * 8; $mask = 0xff << $s; - print FILE1 pack("C", ($v1 & $mask) >> $s) if $v1 & $mask; + print FILE1 pack("C", ($v1 & $mask) >> $s) if $v1 & $mask; print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask; } - for ($i = 3; $i >= 0; $i--) { - $s = $i * 8; + for ($i = 3; $i >= 0; $i--) + { + $s = $i * 8; $mask = 0xff << $s; print FILE1 pack("C", ($v2 & $mask) >> $s) if $v2 & $mask; } @@ -162,65 +199,78 @@ if ($TEST == 1) { $in_file = "euc-jis-2004-std.txt"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; reset 'array1'; reset 'comment'; reset 'comment1'; -while($line = <FILE> ){ - if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs = hex($u1); - $utf1 = &ucs2utf($ucs); - $ucs = hex($u2); - $utf2 = &ucs2utf($ucs); - $str = sprintf "%08x%08x", $utf1, $utf2; - $array1{ $code } = $str; - $comment1{ $code } = $rest; +while ($line = <FILE>) +{ + if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u1 = $2; + $u2 = $3; + $rest = "U+" . $u1 . "+" . $u2 . $4; + $code = hex($c); + $ucs = hex($u1); + $utf1 = &ucs2utf($ucs); + $ucs = hex($u2); + $utf2 = &ucs2utf($ucs); + $str = sprintf "%08x%08x", $utf1, $utf2; + $array1{$code} = $str; + $comment1{$code} = $rest; $count1++; next; - } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; + } + elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u = $2; $rest = "U+" . $u . $3; - } else { + } + else + { next; } - $ucs = hex($u); + $ucs = hex($u); $code = hex($c); - $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + $utf = &ucs2utf($ucs); + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $code } = $utf; - $comment{ $utf } = $rest; + $array{$code} = $utf; + $comment{$utf} = $rest; } -close( FILE ); +close(FILE); $file = "euc_jis_2004_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n"; print FILE " */\n"; print FILE "static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ - printf FILE " {0x%06x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code }; - } else { - printf FILE " {0x%06x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code }; + if ($count == 0) + { + printf FILE " {0x%06x, 0x%08x} /* %s */\n", $index, $code, + $comment{$code}; + } + else + { + printf FILE " {0x%06x, 0x%08x}, /* %s */\n", $index, $code, + $comment{$code}; } } @@ -228,19 +278,26 @@ print FILE "};\n"; close(FILE); $file = "euc_jis_2004_to_utf8_combined.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n"; print FILE " */\n"; -print FILE "static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n"; +print FILE + "static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n"; -for $index ( sort {$a <=> $b} keys( %array1 ) ){ - $code = $array1{ $index }; +for $index (sort { $a <=> $b } keys(%array1)) +{ + $code = $array1{$index}; $count1--; - if( $count1 == 0 ){ - printf FILE " {0x%06x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index }; - } else { - printf FILE " {0x%06x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index }; + if ($count1 == 0) + { + printf FILE " {0x%06x, 0x%s, 0x%s} /* %s */\n", $index, + substr($code, 0, 8), substr($code, 8, 8), $comment1{$index}; + } + else + { + printf FILE " {0x%06x, 0x%s, 0x%s}, /* %s */\n", $index, + substr($code, 0, 8), substr($code, 8, 8), $comment1{$index}; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl index 7c1292e3b8a..54632f168fd 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl @@ -36,102 +36,118 @@ require "ucs2utf.pl"; # $in_file = "JIS0201.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; # add single shift 2 - $array{ $utf } = ($code | 0x8e00); + $array{$utf} = ($code | 0x8e00); } } -close( FILE ); +close(FILE); # # JIS0208 # $in_file = "JIS0208.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $s, $c, $u, $rest ) = split; - $ucs = hex($u); + ($s, $c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = ($code | 0x8080); + $array{$utf} = ($code | 0x8080); } } -close( FILE ); +close(FILE); # # JIS0212 # $in_file = "JIS0212.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = ($code | 0x8f8080); + $array{$utf} = ($code | 0x8f8080); } } -close( FILE ); +close(FILE); # # first, generate UTF8 --> EUC_JP table # $file = "utf8_to_euc_jp.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -148,100 +164,116 @@ close(FILE); # $in_file = "JIS0201.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate code: %04x\n", $ucs; next; } $count++; # add single shift 2 $code |= 0x8e00; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); # # JIS0208 # $in_file = "JIS0208.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $s, $c, $u, $rest ) = split; - $ucs = hex($u); + ($s, $c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate code: %04x\n", $ucs; next; } $count++; $code |= 0x8080; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); # # JIS0212 # $in_file = "JIS0212.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate code: %04x\n", $ucs; next; } $count++; $code |= 0x8f8080; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); $file = "euc_jp_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl index 6825f5339ac..d0b22fcceb6 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl @@ -22,43 +22,51 @@ require "ucs2utf.pl"; $in_file = "KSX1001.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = ($code | 0x8080); + $array{$utf} = ($code | 0x8080); } } -close( FILE ); +close(FILE); # # first, generate UTF8 --> EUC_KR table # $file = "utf8_to_euc_kr.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmapEUC_KR[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -71,39 +79,47 @@ close(FILE); # reset 'array'; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate code: %04x\n", $ucs; next; } $count++; $code |= 0x8080; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); $file = "euc_kr_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmapEUC_KR[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl index dc406dc16c7..45cee78ed85 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl @@ -23,53 +23,66 @@ require "ucs2utf.pl"; $in_file = "CNS11643.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; $plane = ($code & 0x1f0000) >> 16; - if ($plane > 16) { + if ($plane > 16) + { printf STDERR "Warning: invalid plane No.$plane. ignored\n"; next; } - if ($plane == 1) { - $array{ $utf } = (($code & 0xffff) | 0x8080); - } else { - $array{ $utf } = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080); + if ($plane == 1) + { + $array{$utf} = (($code & 0xffff) | 0x8080); + } + else + { + $array{$utf} = + (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080); } } } -close( FILE ); +close(FILE); # # first, generate UTF8 --> EUC_TW table # $file = "utf8_to_euc_tw.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -82,50 +95,60 @@ close(FILE); # reset 'array'; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate code: %04x\n", $ucs; next; } $count++; $plane = ($code & 0x1f0000) >> 16; - if ($plane > 16) { + if ($plane > 16) + { printf STDERR "Warning: invalid plane No.$plane. ignored\n"; next; } - if ($plane == 1) { + if ($plane == 1) + { $c = (($code & 0xffff) | 0x8080); - $array{ $c } = $utf; + $array{$c} = $utf; $count++; } $c = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080); - $array{ $c } = $utf; + $array{$c} = $utf; } } -close( FILE ); +close(FILE); $file = "euc_tw_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl index 636a9620bd9..ef5dd81de75 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl @@ -18,28 +18,32 @@ require "ucs2utf.pl"; $in_file = "ISO10646-GB18030.TXT"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $u, $c, $rest ) = split; - $ucs = hex($u); + ($u, $c, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = $code; + $array{$utf} = $code; } } -close( FILE ); +close(FILE); # @@ -47,15 +51,19 @@ close( FILE ); # $file = "utf8_to_gb18030.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmapGB18030[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -69,38 +77,46 @@ close(FILE); # reset 'array'; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $u, $c, $rest ) = split; - $ucs = hex($u); + ($u, $c, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate code: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate code: %04x\n", $ucs; next; } $count++; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); $file = "gb18030_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmapGB18030[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl index b16cdb3321b..40735ed7e2e 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl @@ -13,65 +13,80 @@ require "ucs2utf.pl"; $in_file = "sjis-0213-2004-std.txt"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; reset 'array1'; reset 'comment'; reset 'comment1'; -while($line = <FILE> ){ - if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs = hex($u1); - $utf1 = &ucs2utf($ucs); - $ucs = hex($u2); - $utf2 = &ucs2utf($ucs); - $str = sprintf "%08x%08x", $utf1, $utf2; - $array1{ $str } = $code; - $comment1{ $str } = $rest; +while ($line = <FILE>) +{ + if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u1 = $2; + $u2 = $3; + $rest = "U+" . $u1 . "+" . $u2 . $4; + $code = hex($c); + $ucs = hex($u1); + $utf1 = &ucs2utf($ucs); + $ucs = hex($u2); + $utf2 = &ucs2utf($ucs); + $str = sprintf "%08x%08x", $utf1, $utf2; + $array1{$str} = $code; + $comment1{$str} = $rest; $count1++; next; - } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; + } + elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u = $2; $rest = "U+" . $u . $3; - } else { + } + else + { next; } - $ucs = hex($u); + $ucs = hex($u); $code = hex($c); - $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code; + $utf = &ucs2utf($ucs); + if ($array{$utf} ne "") + { + printf STDERR + "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf, + $ucs, $code; next; } $count++; - $array{ $utf } = $code; - $comment{ $code } = $rest; + $array{$utf} = $code; + $comment{$code} = $rest; } -close( FILE ); +close(FILE); $file = "utf8_to_shift_jis_2004.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n"; print FILE " */\n"; print FILE "static pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ - printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, $comment{ $code }; - } else { - printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, $comment{ $code }; + if ($count == 0) + { + printf FILE " {0x%08x, 0x%06x} /* %s */\n", $index, $code, + $comment{$code}; + } + else + { + printf FILE " {0x%08x, 0x%06x}, /* %s */\n", $index, $code, + $comment{$code}; } } @@ -79,19 +94,27 @@ print FILE "};\n"; close(FILE); $file = "utf8_to_shift_jis_2004_combined.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n"; print FILE " */\n"; -print FILE "static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n"; +print FILE + "static pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n"; -for $index ( sort {$a cmp $b} keys( %array1 ) ){ - $code = $array1{ $index }; +for $index (sort { $a cmp $b } keys(%array1)) +{ + $code = $array1{$index}; $count1--; - if( $count1 == 0 ){ - printf FILE " {0x%s, 0x%s, 0x%04x} /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index }; - } else { - printf FILE " {0x%s, 0x%s, 0x%04x}, /* %s */\n", substr($index, 0, 8), substr($index, 8, 8), $code, $comment1{ $index }; + if ($count1 == 0) + { + printf FILE " {0x%s, 0x%s, 0x%04x} /* %s */\n", substr($index, 0, 8), + substr($index, 8, 8), $code, $comment1{$index}; + } + else + { + printf FILE " {0x%s, 0x%s, 0x%04x}, /* %s */\n", + substr($index, 0, 8), substr($index, 8, 8), $code, + $comment1{$index}; } } @@ -102,66 +125,81 @@ close(FILE); $in_file = "sjis-0213-2004-std.txt"; -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; reset 'array1'; reset 'comment'; reset 'comment1'; -while($line = <FILE> ){ - if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs = hex($u1); - $utf1 = &ucs2utf($ucs); - $ucs = hex($u2); - $utf2 = &ucs2utf($ucs); - $str = sprintf "%08x%08x", $utf1, $utf2; - $array1{ $code } = $str; - $comment1{ $code } = $rest; +while ($line = <FILE>) +{ + if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u1 = $2; + $u2 = $3; + $rest = "U+" . $u1 . "+" . $u2 . $4; + $code = hex($c); + $ucs = hex($u1); + $utf1 = &ucs2utf($ucs); + $ucs = hex($u2); + $utf2 = &ucs2utf($ucs); + $str = sprintf "%08x%08x", $utf1, $utf2; + $array1{$code} = $str; + $comment1{$code} = $rest; $count1++; next; - } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; + } + elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) + { + $c = $1; + $u = $2; $rest = "U+" . $u . $3; - } else { + } + else + { next; } - $ucs = hex($u); + $ucs = hex($u); $code = hex($c); - $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n",$utf, $ucs, $code; - printf STDERR "Previous value: UTF-8: %08x\n", $array{ $utf }; + $utf = &ucs2utf($ucs); + if ($array{$code} ne "") + { + printf STDERR + "Warning: duplicate UTF-8: %08x UCS: %04x Shift JIS: %04x\n", $utf, + $ucs, $code; + printf STDERR "Previous value: UTF-8: %08x\n", $array{$utf}; next; } $count++; - $array{ $code } = $utf; - $comment{ $utf } = $rest; + $array{$code} = $utf; + $comment{$utf} = $rest; } -close( FILE ); +close(FILE); $file = "shift_jis_2004_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n"; print FILE " */\n"; print FILE "static pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ - printf FILE " {0x%04x, 0x%08x} /* %s */\n", $index, $code, $comment{ $code }; - } else { - printf FILE " {0x%04x, 0x%08x}, /* %s */\n", $index, $code, $comment{ $code }; + if ($count == 0) + { + printf FILE " {0x%04x, 0x%08x} /* %s */\n", $index, $code, + $comment{$code}; + } + else + { + printf FILE " {0x%04x, 0x%08x}, /* %s */\n", $index, $code, + $comment{$code}; } } @@ -169,19 +207,26 @@ print FILE "};\n"; close(FILE); $file = "shift_jis_2004_to_utf8_combined.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "/*\n"; print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n"; print FILE " */\n"; -print FILE "static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n"; +print FILE + "static pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n"; -for $index ( sort {$a <=> $b} keys( %array1 ) ){ - $code = $array1{ $index }; +for $index (sort { $a <=> $b } keys(%array1)) +{ + $code = $array1{$index}; $count1--; - if( $count1 == 0 ){ - printf FILE " {0x%04x, 0x%s, 0x%s} /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index }; - } else { - printf FILE " {0x%04x, 0x%s, 0x%s}, /* %s */\n", $index, substr($code, 0, 8), substr($code, 8, 8), $comment1{ $index }; + if ($count1 == 0) + { + printf FILE " {0x%04x, 0x%s, 0x%s} /* %s */\n", $index, + substr($code, 0, 8), substr($code, 8, 8), $comment1{$index}; + } + else + { + printf FILE " {0x%04x, 0x%s, 0x%s}, /* %s */\n", $index, + substr($code, 0, 8), substr($code, 8, 8), $comment1{$index}; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl index 510350e6b53..f93ca7af302 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl @@ -22,60 +22,68 @@ require "ucs2utf.pl"; # first generate UTF-8 --> SJIS table $in_file = "CP932.TXT"; -$count = 0; +$count = 0; + +open(FILE, $in_file) || die("cannot open $in_file"); -open( FILE, $in_file ) || die( "cannot open $in_file" ); - -while( <FILE> ){ - chop; - if( /^#/ ){ - next; - } - ( $c, $u, $rest ) = split; - $ucs = hex($u); - $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ - $utf = &ucs2utf($ucs); - if((( $code >= 0xed40 ) - && ( $code <= 0xeefc )) - || (( $code >= 0x8754 ) - &&( $code <= 0x875d )) - || ( $code == 0x878a ) - || ( $code == 0x8782 ) - || ( $code == 0x8784 ) - || ( $code == 0xfa5b ) - || ( $code == 0xfa54 ) - || (( $code >= 0x8790 ) - && ( $code <= 0x8792 )) - || (( $code >= 0x8795 ) - && ( $code <= 0x8797 )) - || (( $code >= 0x879a ) - && ( $code <= 0x879c ))) - { - printf STDERR "Warning: duplicate UTF8 : UCS=0x%04x SJIS=0x%04x\n",$ucs,$code; - next; - } - $count++; - $array{ $utf } = $code; - } +while (<FILE>) +{ + chop; + if (/^#/) + { + next; + } + ($c, $u, $rest) = split; + $ucs = hex($u); + $code = hex($c); + if ($code >= 0x80 && $ucs >= 0x0080) + { + $utf = &ucs2utf($ucs); + if ((($code >= 0xed40) && ($code <= 0xeefc)) + || ( ($code >= 0x8754) + && ($code <= 0x875d)) + || ($code == 0x878a) + || ($code == 0x8782) + || ($code == 0x8784) + || ($code == 0xfa5b) + || ($code == 0xfa54) + || ( ($code >= 0x8790) + && ($code <= 0x8792)) + || ( ($code >= 0x8795) + && ($code <= 0x8797)) + || ( ($code >= 0x879a) + && ($code <= 0x879c))) + { + printf STDERR + "Warning: duplicate UTF8 : UCS=0x%04x SJIS=0x%04x\n", $ucs, + $code; + next; + } + $count++; + $array{$utf} = $code; + } } -close( FILE ); +close(FILE); # # first, generate UTF8 --> SJIS table # $file = "utf8_to_sjis.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmapSJIS[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -87,37 +95,44 @@ close(FILE); # then generate SJIS --> UTF8 table # -open( FILE, $in_file ) || die( "cannot open $in_file" ); +open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; $count = 0; -while( <FILE> ){ +while (<FILE>) +{ chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080 ){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); $count++; - $array{ $code } = $utf; + $array{$code} = $utf; } } -close( FILE ); +close(FILE); $file = "sjis_to_utf8.map"; -open( FILE, "> $file" ) || die( "cannot open $file" ); +open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmapSJIS[ $count ] = {\n"; -for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; +for $index (sort { $a <=> $b } keys(%array)) +{ + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl index b67c7943e63..bd031f79a06 100644 --- a/src/backend/utils/mb/Unicode/UCS_to_most.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl @@ -18,80 +18,88 @@ require "ucs2utf.pl"; %filename = ( - 'WIN866' => 'CP866.TXT', - 'WIN874' => 'CP874.TXT', - 'WIN1250' => 'CP1250.TXT', - 'WIN1251' => 'CP1251.TXT', - 'WIN1252' => 'CP1252.TXT', - 'WIN1253' => 'CP1253.TXT', - 'WIN1254' => 'CP1254.TXT', - 'WIN1255' => 'CP1255.TXT', - 'WIN1256' => 'CP1256.TXT', - 'WIN1257' => 'CP1257.TXT', - 'WIN1258' => 'CP1258.TXT', - 'ISO8859_2' => '8859-2.TXT', - 'ISO8859_3' => '8859-3.TXT', - 'ISO8859_4' => '8859-4.TXT', - 'ISO8859_5' => '8859-5.TXT', - 'ISO8859_6' => '8859-6.TXT', - 'ISO8859_7' => '8859-7.TXT', - 'ISO8859_8' => '8859-8.TXT', - 'ISO8859_9' => '8859-9.TXT', + 'WIN866' => 'CP866.TXT', + 'WIN874' => 'CP874.TXT', + 'WIN1250' => 'CP1250.TXT', + 'WIN1251' => 'CP1251.TXT', + 'WIN1252' => 'CP1252.TXT', + 'WIN1253' => 'CP1253.TXT', + 'WIN1254' => 'CP1254.TXT', + 'WIN1255' => 'CP1255.TXT', + 'WIN1256' => 'CP1256.TXT', + 'WIN1257' => 'CP1257.TXT', + 'WIN1258' => 'CP1258.TXT', + 'ISO8859_2' => '8859-2.TXT', + 'ISO8859_3' => '8859-3.TXT', + 'ISO8859_4' => '8859-4.TXT', + 'ISO8859_5' => '8859-5.TXT', + 'ISO8859_6' => '8859-6.TXT', + 'ISO8859_7' => '8859-7.TXT', + 'ISO8859_8' => '8859-8.TXT', + 'ISO8859_9' => '8859-9.TXT', 'ISO8859_10' => '8859-10.TXT', 'ISO8859_13' => '8859-13.TXT', 'ISO8859_14' => '8859-14.TXT', 'ISO8859_15' => '8859-15.TXT', 'ISO8859_16' => '8859-16.TXT', - 'KOI8R' => 'KOI8-R.TXT', - 'KOI8U' => 'KOI8-U.TXT', - 'GBK' => 'CP936.TXT', - 'UHC' => 'CP949.TXT', - 'JOHAB' => 'JOHAB.TXT', -); + 'KOI8R' => 'KOI8-R.TXT', + 'KOI8U' => 'KOI8-U.TXT', + 'GBK' => 'CP936.TXT', + 'UHC' => 'CP949.TXT', + 'JOHAB' => 'JOHAB.TXT',); @charsets = keys(filename); @charsets = @ARGV if scalar(@ARGV); -foreach $charset (@charsets) { +foreach $charset (@charsets) +{ -# -# first, generate UTF8-> charset table -# - $in_file = $filename{$charset}; + # + # first, generate UTF8-> charset table + # + $in_file = $filename{$charset}; - open( FILE, $in_file ) || die( "cannot open $in_file" ); + open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; - while( <FILE> ){ + while (<FILE>) + { chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if( $code >= 0x80 && $ucs >= 0x0080){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $utf } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$utf} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $utf } = $code; + $array{$utf} = $code; } } - close( FILE ); + close(FILE); $file = lc("utf8_to_${charset}.map"); - open( FILE, "> $file" ) || die( "cannot open $file" ); + open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_utf_to_local ULmap${charset}[ $count ] = {\n"; - for $index ( sort {$a <=> $b} keys( %array ) ){ - $code = $array{ $index }; + for $index (sort { $a <=> $b } keys(%array)) + { + $code = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $code; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $code; } } @@ -99,42 +107,50 @@ foreach $charset (@charsets) { print FILE "};\n"; close(FILE); -# -# then generate character set code ->UTF8 table -# - open( FILE, $in_file ) || die( "cannot open $in_file" ); + # + # then generate character set code ->UTF8 table + # + open(FILE, $in_file) || die("cannot open $in_file"); reset 'array'; - while( <FILE> ){ + while (<FILE>) + { chop; - if( /^#/ ){ + if (/^#/) + { next; } - ( $c, $u, $rest ) = split; - $ucs = hex($u); + ($c, $u, $rest) = split; + $ucs = hex($u); $code = hex($c); - if($code >= 0x80 && $ucs >= 0x0080){ + if ($code >= 0x80 && $ucs >= 0x0080) + { $utf = &ucs2utf($ucs); - if( $array{ $code } ne "" ){ - printf STDERR "Warning: duplicate UTF8: %04x\n",$ucs; + if ($array{$code} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; next; } $count++; - $array{ $code } = $utf; + $array{$code} = $utf; } } - close( FILE ); + close(FILE); $file = lc("${charset}_to_utf8.map"); - open( FILE, "> $file" ) || die( "cannot open $file" ); + open(FILE, "> $file") || die("cannot open $file"); print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n"; - for $index ( sort {$a <=> $b} keys( %array ) ){ - $utf = $array{ $index }; + for $index (sort { $a <=> $b } keys(%array)) + { + $utf = $array{$index}; $count--; - if( $count == 0 ){ + if ($count == 0) + { printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; - } else { + } + else + { printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; } } diff --git a/src/backend/utils/mb/Unicode/ucs2utf.pl b/src/backend/utils/mb/Unicode/ucs2utf.pl index 083dd8ac06b..48d2b69616a 100644 --- a/src/backend/utils/mb/Unicode/ucs2utf.pl +++ b/src/backend/utils/mb/Unicode/ucs2utf.pl @@ -4,24 +4,32 @@ # src/backend/utils/mb/Unicode/ucs2utf.pl # convert UCS-4 to UTF-8 # -sub ucs2utf { - local($ucs) = @_; +sub ucs2utf +{ + local ($ucs) = @_; local $utf; - if ($ucs <= 0x007f) { + if ($ucs <= 0x007f) + { $utf = $ucs; - } elsif ($ucs > 0x007f && $ucs <= 0x07ff) { + } + elsif ($ucs > 0x007f && $ucs <= 0x07ff) + { $utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8); - } elsif ($ucs > 0x07ff && $ucs <= 0xffff) { - $utf = ((($ucs >> 12) | 0xe0) << 16) | - (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | - (($ucs & 0x003f) | 0x80); - } else { - $utf = ((($ucs >> 18) | 0xf0) << 24) | - (((($ucs & 0x3ffff) >> 12) | 0x80) << 16) | - (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | - (($ucs & 0x003f) | 0x80); - } - return($utf); + } + elsif ($ucs > 0x07ff && $ucs <= 0xffff) + { + $utf = + ((($ucs >> 12) | 0xe0) << 16) | + (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80); + } + else + { + $utf = + ((($ucs >> 18) | 0xf0) << 24) | + (((($ucs & 0x3ffff) >> 12) | 0x80) << 16) | + (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80); + } + return ($utf); } 1; |
