summaryrefslogtreecommitdiff
path: root/io.c
diff options
context:
space:
mode:
authorKJ Tsanaktsidis <[email protected]>2023-12-28 16:08:54 +1100
committerKJ Tsanaktsidis <[email protected]>2024-01-10 21:02:23 +1100
commit31371b2e24b03ccb0a03b622faf8c65e6cf6a31a (patch)
treee419cf38ee171aaf893dd45dcc80ebff5e051cd5 /io.c
parentc4051d5f4324536a932e99cbe43f5b7dbe34254a (diff)
Fix CRLF -> LF conversion on read for rb_io_fdopen & rb_file_open
When opening a file with `File.open`, and then setting the encoding with `IO#set_encoding`, it still correctly performs CRLF -> LF conversion on Windows when reading files with a CRLF line ending in them (in text mode). However, the file is opened instead with either the `rb_io_fdopen` or `rb_file_open` APIs from C, the CRLF conversion is _NOT_ set up correctly; it works if the encoding is not specified, but if `IO#set_encoding` is called, the conversion stops happening. This seems to be because the encflags never get ECONV_DEFAULT_NEWLINE_DECORATOR set in these codepaths. Concretely, this means that the conversion doesn't happen in the following circumstances: * When loading ruby files with require (that calls rb_io_fdopen) * When parsing ruuby files with RubyVM::AbstractSyntaxTree (that calls rb_file_open). This then causes the ErrorHighlight tests to fail on windows if git has checked them out with CRLF line endings - the error messages it's testing wind up with literal \r\n sequences in them because the iseq text from the parser contains un-newline-converted strings. This commit fixes the problem by copy-pasting the relevant snippet which sets this up in `rb_io_extract_modeenc` (for the File.open path) into the relevant codepaths for `rb_io_fdopen` and `rb_file_open`. [Bug #20101]
Diffstat (limited to 'io.c')
-rw-r--r--io.c33
1 files changed, 28 insertions, 5 deletions
diff --git a/io.c b/io.c
index f6cd2c1a56..90bf245071 100644
--- a/io.c
+++ b/io.c
@@ -7166,8 +7166,6 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr)
if (p) {
parse_mode_enc(p+1, rb_usascii_encoding(),
&convconfig.enc, &convconfig.enc2, &fmode);
- convconfig.ecflags = 0;
- convconfig.ecopts = Qnil;
}
else {
rb_encoding *e;
@@ -7175,10 +7173,19 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr)
e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode);
- convconfig.ecflags = 0;
- convconfig.ecopts = Qnil;
}
+ convconfig.ecflags = (fmode & FMODE_READABLE) ?
+ MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR,
+ 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0;
+#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE
+ convconfig.ecflags |= (fmode & FMODE_WRITABLE) ?
+ MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
+ 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
+#endif
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags);
+ convconfig.ecopts = Qnil;
+
return rb_file_open_generic(io, filename,
rb_io_fmode_oflags(fmode),
fmode,
@@ -9241,11 +9248,27 @@ static VALUE
prep_io(int fd, int fmode, VALUE klass, const char *path)
{
VALUE path_value = Qnil;
+ rb_encoding *e;
+ struct rb_io_encoding convconfig;
+
if (path) {
path_value = rb_obj_freeze(rb_str_new_cstr(path));
}
- VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, NULL);
+ e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL;
+ rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2, fmode);
+ convconfig.ecflags = (fmode & FMODE_READABLE) ?
+ MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR,
+ 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0;
+#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE
+ convconfig.ecflags |= (fmode & FMODE_WRITABLE) ?
+ MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
+ 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
+#endif
+ SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(convconfig.enc2, convconfig.ecflags);
+ convconfig.ecopts = Qnil;
+
+ VALUE self = rb_io_open_descriptor(klass, fd, fmode, path_value, Qnil, &convconfig);
rb_io_t*io = RFILE(self)->fptr;
if (!io_check_tty(io)) {