diff options
author | tomoya ishida <[email protected]> | 2024-10-02 02:01:31 +0900 |
---|---|---|
committer | git <[email protected]> | 2024-10-01 17:01:38 +0000 |
commit | e320da60976f6818c8667afb98fe88142c3073d2 (patch) | |
tree | 3b056188af6c69387799b656a1e6aebf972255d2 | |
parent | ec230ac6432ea89f1ee53d82a62337d4883dc83a (diff) |
[ruby/reline] Fix Reline crash with invalid encoding history
(https://github.com/ruby/reline/pull/751)
https://github.com/ruby/reline/commit/e9d4b37e34
-rw-r--r-- | lib/reline/history.rb | 6 | ||||
-rw-r--r-- | lib/reline/line_editor.rb | 2 | ||||
-rw-r--r-- | lib/reline/unicode.rb | 16 | ||||
-rw-r--r-- | test/reline/test_history.rb | 9 | ||||
-rw-r--r-- | test/reline/test_unicode.rb | 28 |
5 files changed, 57 insertions, 4 deletions
diff --git a/lib/reline/history.rb b/lib/reline/history.rb index 3f3b65fea6..47c68ba774 100644 --- a/lib/reline/history.rb +++ b/lib/reline/history.rb @@ -19,7 +19,7 @@ class Reline::History < Array def []=(index, val) index = check_index(index) - super(index, String.new(val, encoding: Reline.encoding_system_needs)) + super(index, Reline::Unicode.safe_encode(val, Reline.encoding_system_needs)) end def concat(*val) @@ -45,7 +45,7 @@ class Reline::History < Array end end super(*(val.map{ |v| - String.new(v, encoding: Reline.encoding_system_needs) + Reline::Unicode.safe_encode(v, Reline.encoding_system_needs) })) end @@ -56,7 +56,7 @@ class Reline::History < Array if @config.history_size.positive? shift if size + 1 > @config.history_size end - super(String.new(val, encoding: Reline.encoding_system_needs)) + super(Reline::Unicode.safe_encode(val, Reline.encoding_system_needs)) end private def check_index(index) diff --git a/lib/reline/line_editor.rb b/lib/reline/line_editor.rb index c71a5f79ee..56dc235c03 100644 --- a/lib/reline/line_editor.rb +++ b/lib/reline/line_editor.rb @@ -1325,7 +1325,7 @@ class Reline::LineEditor save_old_buffer pre = @buffer_of_lines[@line_index].byteslice(0, @byte_pointer) post = @buffer_of_lines[@line_index].byteslice(@byte_pointer..) - lines = (pre + text.gsub(/\r\n?/, "\n") + post).split("\n", -1) + lines = (pre + Reline::Unicode.safe_encode(text, @encoding).gsub(/\r\n?/, "\n") + post).split("\n", -1) lines << '' if lines.empty? @buffer_of_lines[@line_index, 1] = lines @line_index += lines.size - 1 diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb index ef239d5e9e..0ec815aeea 100644 --- a/lib/reline/unicode.rb +++ b/lib/reline/unicode.rb @@ -54,6 +54,22 @@ class Reline::Unicode }.join end + def self.safe_encode(str, encoding) + # Reline only supports utf-8 convertible string. + converted = str.encode(encoding, invalid: :replace, undef: :replace) + return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only? + + # This code is essentially doing the same thing as + # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)` + # but also avoids unneccesary irreversible encoding conversion. + converted.gsub(/\X/) do |c| + c.encode(Encoding::UTF_8) + c + rescue Encoding::UndefinedConversionError + '?' + end + end + require 'reline/unicode/east_asian_width' def self.get_mbchar_width(mbchar) diff --git a/test/reline/test_history.rb b/test/reline/test_history.rb index ddf8fb1472..ea902b0653 100644 --- a/test/reline/test_history.rb +++ b/test/reline/test_history.rb @@ -266,6 +266,15 @@ class Reline::History::Test < Reline::TestCase assert_equal 5, history.size end + def test_history_encoding_conversion + history = history_new + text1 = String.new("a\u{65535}b\xFFc", encoding: Encoding::UTF_8) + text2 = String.new("d\xFFe", encoding: Encoding::Shift_JIS) + history.push(text1.dup, text2.dup) + expected = [text1, text2].map { |s| s.encode(Reline.encoding_system_needs, invalid: :replace, undef: :replace) } + assert_equal(expected, history.to_a) + end + private def history_new(history_size: 10) diff --git a/test/reline/test_unicode.rb b/test/reline/test_unicode.rb index deba4d4681..688d25e238 100644 --- a/test/reline/test_unicode.rb +++ b/test/reline/test_unicode.rb @@ -89,4 +89,32 @@ class Reline::Unicode::Test < Reline::TestCase assert_equal ["\e[31mc\1ABC\2d\e[0mef", 2, 4], Reline::Unicode.take_mbchar_range("\e[31mabc\1ABC\2d\e[0mefghi", 2, 4) assert_equal ["\e[41m \e[42mい\e[43m ", 1, 4], Reline::Unicode.take_mbchar_range("\e[41mあ\e[42mい\e[43mう", 1, 4, padding: true) end + + def test_encoding_conversion + texts = [ + String.new("invalid\xFFutf8", encoding: 'utf-8'), + String.new("invalid\xFFsjis", encoding: 'sjis'), + "utf8#{33111.chr('sjis')}convertible", + "utf8#{33222.chr('sjis')}inconvertible", + "sjis->utf8->sjis#{60777.chr('sjis')}irreversible" + ] + utf8_texts = [ + 'invalid�utf8', + 'invalid�sjis', + 'utf8仝convertible', + 'utf8�inconvertible', + 'sjis->utf8->sjis劦irreversible' + ] + sjis_texts = [ + 'invalid?utf8', + 'invalid?sjis', + "utf8#{33111.chr('sjis')}convertible", + 'utf8?inconvertible', + "sjis->utf8->sjis#{60777.chr('sjis')}irreversible" + ] + assert_equal(utf8_texts, texts.map { |s| Reline::Unicode.safe_encode(s, 'utf-8') }) + assert_equal(utf8_texts, texts.map { |s| Reline::Unicode.safe_encode(s, Encoding::UTF_8) }) + assert_equal(sjis_texts, texts.map { |s| Reline::Unicode.safe_encode(s, 'sjis') }) + assert_equal(sjis_texts, texts.map { |s| Reline::Unicode.safe_encode(s, Encoding::Windows_31J) }) + end end |