diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-02-06 13:31:07 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-02-06 13:31:07 +0000 |
commit | 8e46f401b299eb314ea2d3637e555eb04f968f8b (patch) | |
tree | 4b0511e6f863a78341c09effc838c534b06b8401 | |
parent | 16e613fcc3b8219fc8fb80ca0928660ef174c406 (diff) |
ASCII-incompatible escape
* lib/cgi/util.rb (escapeHTML, unescapeHTML): consider
ASCII-incompatible encodings. [Fix GH-1239]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53754 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | lib/cgi/util.rb | 39 | ||||
-rw-r--r-- | test/cgi/test_cgi_util.rb | 16 |
3 files changed, 50 insertions, 10 deletions
@@ -1,3 +1,8 @@ +Sat Feb 6 22:30:57 2016 Nobuyoshi Nakada <[email protected]> + + * lib/cgi/util.rb (escapeHTML, unescapeHTML): consider + ASCII-incompatible encodings. [Fix GH-1239] + Sat Feb 6 20:44:24 2016 Nobuyoshi Nakada <[email protected]> * configure.in: check __int64_t and __int128_t for RUBY_DEFINT on diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index 83c310b3cb..d2657ab981 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -35,6 +35,18 @@ module CGI::Util # CGI::escapeHTML('Usage: foo "bar" <baz>') # # => "Usage: foo "bar" <baz>" def escapeHTML(string) + enc = string.encoding + unless enc.ascii_compatible? + if enc.dummy? + origenc = enc + enc = Encoding::Converter.asciicompat_encoding(enc) + string = enc ? string.encode(enc) : string.b + end + table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}] + string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table) + string.encode!(origenc) if origenc + return string + end string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) end @@ -47,10 +59,14 @@ module CGI::Util # CGI::unescapeHTML("Usage: foo "bar" <baz>") # # => "Usage: foo \"bar\" <baz>" def unescapeHTML(string) - return string unless string.include? '&' enc = string.encoding - if enc != Encoding::UTF_8 && [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc) - return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do + unless enc.ascii_compatible? + if enc.dummy? + origenc = enc + enc = Encoding::Converter.asciicompat_encoding(enc) + string = enc ? string.encode(enc) : string.b + end + string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do case $1.encode(Encoding::US_ASCII) when 'apos' then "'".encode(enc) when 'amp' then '&'.encode(enc) @@ -61,8 +77,15 @@ module CGI::Util when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) end end + string.encode!(origenc) if origenc + return string end - asciicompat = Encoding.compatible?(string, "a") + return string unless string.include? '&' + charlimit = case enc + when Encoding::UTF_8; 0x10ffff + when Encoding::ISO_8859_1; 256 + else 128 + end string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do match = $1.dup case match @@ -73,18 +96,14 @@ module CGI::Util when 'lt' then '<' when /\A#0*(\d+)\z/ n = $1.to_i - if enc == Encoding::UTF_8 or - enc == Encoding::ISO_8859_1 && n < 256 or - asciicompat && n < 128 + if n < charlimit n.chr(enc) else "&##{$1};" end when /\A#x([0-9a-f]+)\z/i n = $1.hex - if enc == Encoding::UTF_8 or - enc == Encoding::ISO_8859_1 && n < 256 or - asciicompat && n < 128 + if n < charlimit n.chr(enc) else "&#x#{$1};" diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index 5565afe9c1..f67817fce6 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -98,6 +98,22 @@ class CGIUtilTest < Test::Unit::TestCase assert_equal("'&\"><", CGI::unescapeHTML("'&"><")) end + Encoding.list.each do |enc| + begin + escaped = "'&"><".encode(enc) + unescaped = "'&\"><".encode(enc) + rescue Encoding::ConverterNotFoundError + next + else + define_method("test_cgi_escapeHTML:#{enc.name}") do + assert_equal(escaped, CGI::escapeHTML(unescaped)) + end + define_method("test_cgi_unescapeHTML:#{enc.name}") do + assert_equal(unescaped, CGI::unescapeHTML(escaped)) + end + end + end + def test_cgi_unescapeHTML_uppercasecharacter assert_equal("\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", CGI::unescapeHTML("あいう")) end |