diff options
author | KJ Tsanaktsidis <[email protected]> | 2023-12-28 16:08:54 +1100 |
---|---|---|
committer | KJ Tsanaktsidis <[email protected]> | 2024-01-10 21:02:23 +1100 |
commit | 31371b2e24b03ccb0a03b622faf8c65e6cf6a31a (patch) | |
tree | e419cf38ee171aaf893dd45dcc80ebff5e051cd5 /test | |
parent | c4051d5f4324536a932e99cbe43f5b7dbe34254a (diff) |
Fix CRLF -> LF conversion on read for rb_io_fdopen & rb_file_open
When opening a file with `File.open`, and then setting the encoding with
`IO#set_encoding`, it still correctly performs CRLF -> LF conversion on
Windows when reading files with a CRLF line ending in them (in text
mode).
However, the file is opened instead with either the `rb_io_fdopen` or
`rb_file_open` APIs from C, the CRLF conversion is _NOT_ set up
correctly; it works if the encoding is not specified, but if
`IO#set_encoding` is called, the conversion stops happening. This seems
to be because the encflags never get ECONV_DEFAULT_NEWLINE_DECORATOR
set in these codepaths.
Concretely, this means that the conversion doesn't happen in the
following circumstances:
* When loading ruby files with require (that calls rb_io_fdopen)
* When parsing ruuby files with RubyVM::AbstractSyntaxTree (that calls
rb_file_open).
This then causes the ErrorHighlight tests to fail on windows if git has
checked them out with CRLF line endings - the error messages it's
testing wind up with literal \r\n sequences in them because the iseq
text from the parser contains un-newline-converted strings.
This commit fixes the problem by copy-pasting the relevant snippet which
sets this up in `rb_io_extract_modeenc` (for the File.open path) into
the relevant codepaths for `rb_io_fdopen` and `rb_file_open`.
[Bug #20101]
Diffstat (limited to 'test')
-rw-r--r-- | test/ruby/test_file.rb | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 409d21fc4e..aa10566bfa 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -554,4 +554,250 @@ class TestFile < Test::Unit::TestCase assert_file.absolute_path?("/foo/bar\\baz") end end + + class NewlineConvTests < Test::Unit::TestCase + TEST_STRING_WITH_CRLF = "line1\r\nline2\r\n".freeze + TEST_STRING_WITH_LF = "line1\nline2\n".freeze + + def setup + @tmpdir = Dir.mktmpdir(self.class.name) + @read_path_with_crlf = File.join(@tmpdir, "read_path_with_crlf") + File.binwrite(@read_path_with_crlf, TEST_STRING_WITH_CRLF) + @read_path_with_lf = File.join(@tmpdir, "read_path_with_lf") + File.binwrite(@read_path_with_lf, TEST_STRING_WITH_LF) + @write_path = File.join(@tmpdir, "write_path") + File.binwrite(@write_path, '') + end + + def teardown + FileUtils.rm_rf @tmpdir + end + + def windows? + /cygwin|mswin|mingw/ =~ RUBY_PLATFORM + end + + def open_file_with(method, filename, mode) + read_or_write = mode.include?('w') ? :write : :read + binary_or_text = mode.include?('b') ? :binary : :text + + f = case method + when :ruby_file_open + File.open(filename, mode) + when :c_rb_file_open + Bug::File::NewlineConv.rb_file_open(filename, read_or_write, binary_or_text) + when :c_rb_io_fdopen + Bug::File::NewlineConv.rb_io_fdopen(filename, read_or_write, binary_or_text) + else + raise "Don't know how to open with #{method}" + end + + begin + yield f + ensure + f.close + end + end + + def assert_file_contents_has_lf(f) + assert_equal TEST_STRING_WITH_LF, f.read + end + + def assert_file_contents_has_crlf(f) + assert_equal TEST_STRING_WITH_CRLF, f.read + end + + def assert_file_contents_has_lf_on_windows(f) + if windows? + assert_file_contents_has_lf(f) + else + assert_file_contents_has_crlf(f) + end + end + + def assert_file_contents_has_crlf_on_windows(f) + if windows? + assert_file_contents_has_crlf(f) + else + assert_file_contents_has_lf(f) + end + end + + def test_ruby_file_open_text_mode_read_crlf + open_file_with(:ruby_file_open, @read_path_with_crlf, 'r') { |f| assert_file_contents_has_lf_on_windows(f) } + end + + def test_ruby_file_open_bin_mode_read_crlf + open_file_with(:ruby_file_open, @read_path_with_crlf, 'rb') { |f| assert_file_contents_has_crlf(f) } + end + + def test_ruby_file_open_text_mode_read_lf + open_file_with(:ruby_file_open, @read_path_with_lf, 'r') { |f| assert_file_contents_has_lf(f) } + end + + def test_ruby_file_open_bin_mode_read_lf + open_file_with(:ruby_file_open, @read_path_with_lf, 'rb') { |f| assert_file_contents_has_lf(f) } + end + + def test_ruby_file_open_text_mode_read_crlf_with_utf8_encoding + open_file_with(:ruby_file_open, @read_path_with_crlf, 'r') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf_on_windows(f) + end + end + + def test_ruby_file_open_bin_mode_read_crlf_with_utf8_encoding + open_file_with(:ruby_file_open, @read_path_with_crlf, 'rb') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_crlf(f) + end + end + + def test_ruby_file_open_text_mode_read_lf_with_utf8_encoding + open_file_with(:ruby_file_open, @read_path_with_lf, 'r') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf(f) + end + end + + def test_ruby_file_open_bin_mode_read_lf_with_utf8_encoding + open_file_with(:ruby_file_open, @read_path_with_lf, 'rb') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf(f) + end + end + + def test_ruby_file_open_text_mode_write_lf + open_file_with(:ruby_file_open, @write_path, 'w') { |f| f.write TEST_STRING_WITH_LF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf_on_windows(f) } + end + + def test_ruby_file_open_bin_mode_write_lf + open_file_with(:ruby_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_LF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_lf(f) } + end + + def test_ruby_file_open_bin_mode_write_crlf + open_file_with(:ruby_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_CRLF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf(f) } + end + + def test_c_rb_file_open_text_mode_read_crlf + open_file_with(:c_rb_file_open, @read_path_with_crlf, 'r') { |f| assert_file_contents_has_lf_on_windows(f) } + end + + def test_c_rb_file_open_bin_mode_read_crlf + open_file_with(:c_rb_file_open, @read_path_with_crlf, 'rb') { |f| assert_file_contents_has_crlf(f) } + end + + def test_c_rb_file_open_text_mode_read_lf + open_file_with(:c_rb_file_open, @read_path_with_lf, 'r') { |f| assert_file_contents_has_lf(f) } + end + + def test_c_rb_file_open_bin_mode_read_lf + open_file_with(:c_rb_file_open, @read_path_with_lf, 'rb') { |f| assert_file_contents_has_lf(f) } + end + + def test_c_rb_file_open_text_mode_write_lf + open_file_with(:c_rb_file_open, @write_path, 'w') { |f| f.write TEST_STRING_WITH_LF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf_on_windows(f) } + end + + def test_c_rb_file_open_bin_mode_write_lf + open_file_with(:c_rb_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_LF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_lf(f) } + end + + def test_c_rb_file_open_bin_mode_write_crlf + open_file_with(:c_rb_file_open, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_CRLF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf(f) } + end + + def test_c_rb_file_open_text_mode_read_crlf_with_utf8_encoding + open_file_with(:c_rb_file_open, @read_path_with_crlf, 'r') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf_on_windows(f) + end + end + + def test_c_rb_file_open_bin_mode_read_crlf_with_utf8_encoding + open_file_with(:c_rb_file_open, @read_path_with_crlf, 'rb') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_crlf(f) + end + end + + def test_c_rb_file_open_text_mode_read_lf_with_utf8_encoding + open_file_with(:c_rb_file_open, @read_path_with_lf, 'r') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf(f) + end + end + + def test_c_rb_file_open_bin_mode_read_lf_with_utf8_encoding + open_file_with(:c_rb_file_open, @read_path_with_lf, 'rb') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf(f) + end + end + + def test_c_rb_io_fdopen_text_mode_read_crlf + open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'r') { |f| assert_file_contents_has_lf_on_windows(f) } + end + + def test_c_rb_io_fdopen_bin_mode_read_crlf + open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'rb') { |f| assert_file_contents_has_crlf(f) } + end + + def test_c_rb_io_fdopen_text_mode_read_lf + open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'r') { |f| assert_file_contents_has_lf(f) } + end + + def test_c_rb_io_fdopen_bin_mode_read_lf + open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'rb') { |f| assert_file_contents_has_lf(f) } + end + + def test_c_rb_io_fdopen_text_mode_write_lf + open_file_with(:c_rb_io_fdopen, @write_path, 'w') { |f| f.write TEST_STRING_WITH_LF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf_on_windows(f) } + end + + def test_c_rb_io_fdopen_bin_mode_write_lf + open_file_with(:c_rb_io_fdopen, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_LF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_lf(f) } + end + + def test_c_rb_io_fdopen_bin_mode_write_crlf + open_file_with(:c_rb_io_fdopen, @write_path, 'wb') { |f| f.write TEST_STRING_WITH_CRLF } + File.open(@write_path, 'rb') { |f| assert_file_contents_has_crlf(f) } + end + + def test_c_rb_io_fdopen_text_mode_read_crlf_with_utf8_encoding + open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'r') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf_on_windows(f) + end + end + + def test_c_rb_io_fdopen_bin_mode_read_crlf_with_utf8_encoding + open_file_with(:c_rb_io_fdopen, @read_path_with_crlf, 'rb') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_crlf(f) + end + end + + def test_c_rb_io_fdopen_text_mode_read_lf_with_utf8_encoding + open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'r') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf(f) + end + end + + def test_c_rb_io_fdopen_bin_mode_read_lf_with_utf8_encoding + open_file_with(:c_rb_io_fdopen, @read_path_with_lf, 'rb') do |f| + f.set_encoding Encoding::UTF_8, '-' + assert_file_contents_has_lf(f) + end + end + end end |