diff options
author | Nobuyoshi Nakada <[email protected]> | 2019-08-13 23:23:43 +0900 |
---|---|---|
committer | Nobuyoshi Nakada <[email protected]> | 2019-08-13 23:38:05 +0900 |
commit | 5b1bf8dd2d08ae7371ecf025967376bb794ed651 (patch) | |
tree | f94e66f07289b3244658bddf848f65c77b115427 | |
parent | 79f9c626b63c2ce6ed1f3e767838a02a668145ba (diff) |
UTF LE is fixed at least the first 2 bytes
* io.c (io_strip_bom): if the first 2 bytes are 0xFF0xFE, it
should be a little-endian UTF, 16 or 32. [Bug #16099]
-rw-r--r-- | io.c | 7 | ||||
-rw-r--r-- | test/ruby/test_file.rb | 2 | ||||
-rw-r--r-- | test/ruby/test_io_m17n.rb | 4 |
3 files changed, 5 insertions, 8 deletions
@@ -6136,12 +6136,9 @@ io_strip_bom(VALUE io) return ENCINDEX_UTF_32LE; } rb_io_ungetbyte(io, b4); - rb_io_ungetbyte(io, b3); - } - else { - rb_io_ungetbyte(io, b3); - return ENCINDEX_UTF_16LE; } + rb_io_ungetbyte(io, b3); + return ENCINDEX_UTF_16LE; } rb_io_ungetbyte(io, b2); break; diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 36c154d36c..3deab76e93 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -87,7 +87,7 @@ class TestFile < Test::Unit::TestCase end def test_bom_32le - assert_bom(["\xFF\xFE\0", "\0"], __method__) + assert_bom(["\xFF", "\xFE\0\0"], __method__) end def test_truncate_wbuf diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 630f2eec08..8101bfb62f 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2084,8 +2084,8 @@ EOT define_method("test_strip_bom:#{name}") do path = "#{name}-bom.txt" with_tmpdir { - text = "\uFEFFa" - stripped = "a" + text = "\uFEFF\u0100a" + stripped = "\u0100a" content = text.encode(name) generate_file(path, content) result = File.read(path, mode: 'rb:BOM|UTF-8') |