summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomoya ishida <[email protected]>2024-10-02 02:01:31 +0900
committergit <[email protected]>2024-10-01 17:01:38 +0000
commite320da60976f6818c8667afb98fe88142c3073d2 (patch)
tree3b056188af6c69387799b656a1e6aebf972255d2
parentec230ac6432ea89f1ee53d82a62337d4883dc83a (diff)
[ruby/reline] Fix Reline crash with invalid encoding history
(https://github.com/ruby/reline/pull/751) https://github.com/ruby/reline/commit/e9d4b37e34
-rw-r--r--lib/reline/history.rb6
-rw-r--r--lib/reline/line_editor.rb2
-rw-r--r--lib/reline/unicode.rb16
-rw-r--r--test/reline/test_history.rb9
-rw-r--r--test/reline/test_unicode.rb28
5 files changed, 57 insertions, 4 deletions
diff --git a/lib/reline/history.rb b/lib/reline/history.rb
index 3f3b65fea6..47c68ba774 100644
--- a/lib/reline/history.rb
+++ b/lib/reline/history.rb
@@ -19,7 +19,7 @@ class Reline::History < Array
def []=(index, val)
index = check_index(index)
- super(index, String.new(val, encoding: Reline.encoding_system_needs))
+ super(index, Reline::Unicode.safe_encode(val, Reline.encoding_system_needs))
end
def concat(*val)
@@ -45,7 +45,7 @@ class Reline::History < Array
end
end
super(*(val.map{ |v|
- String.new(v, encoding: Reline.encoding_system_needs)
+ Reline::Unicode.safe_encode(v, Reline.encoding_system_needs)
}))
end
@@ -56,7 +56,7 @@ class Reline::History < Array
if @config.history_size.positive?
shift if size + 1 > @config.history_size
end
- super(String.new(val, encoding: Reline.encoding_system_needs))
+ super(Reline::Unicode.safe_encode(val, Reline.encoding_system_needs))
end
private def check_index(index)
diff --git a/lib/reline/line_editor.rb b/lib/reline/line_editor.rb
index c71a5f79ee..56dc235c03 100644
--- a/lib/reline/line_editor.rb
+++ b/lib/reline/line_editor.rb
@@ -1325,7 +1325,7 @@ class Reline::LineEditor
save_old_buffer
pre = @buffer_of_lines[@line_index].byteslice(0, @byte_pointer)
post = @buffer_of_lines[@line_index].byteslice(@byte_pointer..)
- lines = (pre + text.gsub(/\r\n?/, "\n") + post).split("\n", -1)
+ lines = (pre + Reline::Unicode.safe_encode(text, @encoding).gsub(/\r\n?/, "\n") + post).split("\n", -1)
lines << '' if lines.empty?
@buffer_of_lines[@line_index, 1] = lines
@line_index += lines.size - 1
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb
index ef239d5e9e..0ec815aeea 100644
--- a/lib/reline/unicode.rb
+++ b/lib/reline/unicode.rb
@@ -54,6 +54,22 @@ class Reline::Unicode
}.join
end
+ def self.safe_encode(str, encoding)
+ # Reline only supports utf-8 convertible string.
+ converted = str.encode(encoding, invalid: :replace, undef: :replace)
+ return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only?
+
+ # This code is essentially doing the same thing as
+ # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)`
+ # but also avoids unneccesary irreversible encoding conversion.
+ converted.gsub(/\X/) do |c|
+ c.encode(Encoding::UTF_8)
+ c
+ rescue Encoding::UndefinedConversionError
+ '?'
+ end
+ end
+
require 'reline/unicode/east_asian_width'
def self.get_mbchar_width(mbchar)
diff --git a/test/reline/test_history.rb b/test/reline/test_history.rb
index ddf8fb1472..ea902b0653 100644
--- a/test/reline/test_history.rb
+++ b/test/reline/test_history.rb
@@ -266,6 +266,15 @@ class Reline::History::Test < Reline::TestCase
assert_equal 5, history.size
end
+ def test_history_encoding_conversion
+ history = history_new
+ text1 = String.new("a\u{65535}b\xFFc", encoding: Encoding::UTF_8)
+ text2 = String.new("d\xFFe", encoding: Encoding::Shift_JIS)
+ history.push(text1.dup, text2.dup)
+ expected = [text1, text2].map { |s| s.encode(Reline.encoding_system_needs, invalid: :replace, undef: :replace) }
+ assert_equal(expected, history.to_a)
+ end
+
private
def history_new(history_size: 10)
diff --git a/test/reline/test_unicode.rb b/test/reline/test_unicode.rb
index deba4d4681..688d25e238 100644
--- a/test/reline/test_unicode.rb
+++ b/test/reline/test_unicode.rb
@@ -89,4 +89,32 @@ class Reline::Unicode::Test < Reline::TestCase
assert_equal ["\e[31mc\1ABC\2d\e[0mef", 2, 4], Reline::Unicode.take_mbchar_range("\e[31mabc\1ABC\2d\e[0mefghi", 2, 4)
assert_equal ["\e[41m \e[42mい\e[43m ", 1, 4], Reline::Unicode.take_mbchar_range("\e[41mあ\e[42mい\e[43mう", 1, 4, padding: true)
end
+
+ def test_encoding_conversion
+ texts = [
+ String.new("invalid\xFFutf8", encoding: 'utf-8'),
+ String.new("invalid\xFFsjis", encoding: 'sjis'),
+ "utf8#{33111.chr('sjis')}convertible",
+ "utf8#{33222.chr('sjis')}inconvertible",
+ "sjis->utf8->sjis#{60777.chr('sjis')}irreversible"
+ ]
+ utf8_texts = [
+ 'invalid�utf8',
+ 'invalid�sjis',
+ 'utf8仝convertible',
+ 'utf8�inconvertible',
+ 'sjis->utf8->sjis劦irreversible'
+ ]
+ sjis_texts = [
+ 'invalid?utf8',
+ 'invalid?sjis',
+ "utf8#{33111.chr('sjis')}convertible",
+ 'utf8?inconvertible',
+ "sjis->utf8->sjis#{60777.chr('sjis')}irreversible"
+ ]
+ assert_equal(utf8_texts, texts.map { |s| Reline::Unicode.safe_encode(s, 'utf-8') })
+ assert_equal(utf8_texts, texts.map { |s| Reline::Unicode.safe_encode(s, Encoding::UTF_8) })
+ assert_equal(sjis_texts, texts.map { |s| Reline::Unicode.safe_encode(s, 'sjis') })
+ assert_equal(sjis_texts, texts.map { |s| Reline::Unicode.safe_encode(s, Encoding::Windows_31J) })
+ end
end