From 31371b2e24b03ccb0a03b622faf8c65e6cf6a31a Mon Sep 17 00:00:00 2001 From: KJ Tsanaktsidis Date: Thu, 28 Dec 2023 16:08:54 +1100 Subject: Fix CRLF -> LF conversion on read for rb_io_fdopen & rb_file_open When opening a file with `File.open`, and then setting the encoding with `IO#set_encoding`, it still correctly performs CRLF -> LF conversion on Windows when reading files with a CRLF line ending in them (in text mode). However, the file is opened instead with either the `rb_io_fdopen` or `rb_file_open` APIs from C, the CRLF conversion is _NOT_ set up correctly; it works if the encoding is not specified, but if `IO#set_encoding` is called, the conversion stops happening. This seems to be because the encflags never get ECONV_DEFAULT_NEWLINE_DECORATOR set in these codepaths. Concretely, this means that the conversion doesn't happen in the following circumstances: * When loading ruby files with require (that calls rb_io_fdopen) * When parsing ruuby files with RubyVM::AbstractSyntaxTree (that calls rb_file_open). This then causes the ErrorHighlight tests to fail on windows if git has checked them out with CRLF line endings - the error messages it's testing wind up with literal \r\n sequences in them because the iseq text from the parser contains un-newline-converted strings. This commit fixes the problem by copy-pasting the relevant snippet which sets this up in `rb_io_extract_modeenc` (for the File.open path) into the relevant codepaths for `rb_io_fdopen` and `rb_file_open`. [Bug #20101] --- io.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'io.c') diff --git a/io.c b/io.c index f6cd2c1a56..90bf245071 100644 --- a/io.c +++ b/io.c @@ -7166,8 +7166,6 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) if (p) { parse_mode_enc(p+1, rb_usascii_encoding(), &convconfig.enc, &convconfig.enc2, &fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } else { rb_encoding *e; @@ -7175,10 +7173,19 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); - convconfig.ecflags = 0; - convconfig.ecopts = Qnil; } + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + return rb_file_open_generic(io, filename, rb_io_fmode_oflags(fmode), fmode, @@ -9241,11 +9248,27 @@ static VALUE prep_io(int fd, int fmode, VALUE klass, const char *path) { VALUE path_value = Qnil; + rb_encoding *e; + struct rb_io_encoding convconfig; + if (path) { path_value = rb_obj_freeze(rb_str_new_cstr(path)); } - VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, NULL); + e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; + rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode); + convconfig.ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + convconfig.ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags); + convconfig.ecopts = Qnil; + + VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, &convconfig); rb_io_t*io = RFILE(self)->fptr; if (!io_check_tty(io)) { -- cgit v1.2.3