diff options
author | KJ Tsanaktsidis <[email protected]> | 2023-12-28 16:08:54 +1100 |
---|---|---|
committer | KJ Tsanaktsidis <[email protected]> | 2024-01-10 21:02:23 +1100 |
commit | 31371b2e24b03ccb0a03b622faf8c65e6cf6a31a (patch) | |
tree | e419cf38ee171aaf893dd45dcc80ebff5e051cd5 /io.c | |
parent | c4051d5f4324536a932e99cbe43f5b7dbe34254a (diff) |
Fix CRLF -> LF conversion on read for rb_io_fdopen & rb_file_open
When opening a file with `File.open`, and then setting the encoding with
`IO#set_encoding`, it still correctly performs CRLF -> LF conversion on
Windows when reading files with a CRLF line ending in them (in text
mode).
However, the file is opened instead with either the `rb_io_fdopen` or
`rb_file_open` APIs from C, the CRLF conversion is _NOT_ set up
correctly; it works if the encoding is not specified, but if
`IO#set_encoding` is called, the conversion stops happening. This seems
to be because the encflags never get ECONV_DEFAULT_NEWLINE_DECORATOR
set in these codepaths.
Concretely, this means that the conversion doesn't happen in the
following circumstances:
* When loading ruby files with require (that calls rb_io_fdopen)
* When parsing ruuby files with RubyVM::AbstractSyntaxTree (that calls
rb_file_open).
This then causes the ErrorHighlight tests to fail on windows if git has
checked them out with CRLF line endings - the error messages it's
testing wind up with literal \r\n sequences in them because the iseq
text from the parser contains un-newline-converted strings.
This commit fixes the problem by copy-pasting the relevant snippet which
sets this up in `rb_io_extract_modeenc` (for the File.open path) into
the relevant codepaths for `rb_io_fdopen` and `rb_file_open`.
[Bug #20101]
Diffstat (limited to 'io.c')
-rw-r--r-- | io.c | 33 |
1 files changed, 28 insertions, 5 deletions
@@ -7166,8 +7166,6 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) if (p) { parse_mode_enc(p+1, rb_usascii_encoding(), &convconfig.enc, &convconfig.enc2, &fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } else { rb_encoding *e; @@ -7175,10 +7173,19 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + return rb_file_open_generic(io, filename, rb_io_fmode_oflags(fmode), fmode, @@ -9241,11 +9248,27 @@ static VALUE prep_io(int fd, int fmode, VALUE klass, const char *path) { VALUE path_value = Qnil; + rb_encoding *e; + struct rb_io_encoding convconfig; + if (path) { path_value = rb_obj_freeze(rb_str_new_cstr(path)); } - VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, NULL); + e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; + rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + + VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, &convconfig); rb_io_t*io = RFILE(self)->fptr; if (!io_check_tty(io)) { |