diff options
author | Nobuyoshi Nakada <[email protected]> | 2023-09-26 13:25:44 +0900 |
---|---|---|
committer | Nobuyoshi Nakada <[email protected]> | 2023-09-26 15:35:40 +0900 |
commit | 6b66b5fdedb2c9a9ee48e290d57ca7f8d55e01a2 (patch) | |
tree | a534e2aacfc78f270444742f4837ea629b281d27 | |
parent | f0d827860783afd34e12450dd310e50917d396e3 (diff) |
[Bug #19902] Update the coderange regarding the changed region
-rw-r--r-- | ext/-test-/string/set_len.c | 10 | ||||
-rw-r--r-- | string.c | 27 | ||||
-rw-r--r-- | test/-ext-/string/test_set_len.rb | 29 |
3 files changed, 66 insertions, 0 deletions
diff --git a/ext/-test-/string/set_len.c b/ext/-test-/string/set_len.c index 219cea404c..049da2cdb5 100644 --- a/ext/-test-/string/set_len.c +++ b/ext/-test-/string/set_len.c @@ -7,8 +7,18 @@ bug_str_set_len(VALUE str, VALUE len) return str; } +static VALUE +bug_str_append(VALUE str, VALUE addendum) +{ + StringValue(addendum); + rb_str_modify_expand(str, RSTRING_LEN(addendum)); + memcpy(RSTRING_END(str), RSTRING_PTR(addendum), RSTRING_LEN(addendum)); + return str; +} + void Init_string_set_len(VALUE klass) { rb_define_method(klass, "set_len", bug_str_set_len, 1); + rb_define_method(klass, "append", bug_str_append, 1); } @@ -2985,6 +2985,33 @@ rb_str_set_len(VALUE str, long len) if (len > (capa = (long)str_capacity(str, termlen)) || len < 0) { rb_bug("probable buffer overflow: %ld for %ld", len, capa); } + + int cr = ENC_CODERANGE(str); + if (cr == ENC_CODERANGE_UNKNOWN) { + /* Leave unknown. */ + } + else if (len > RSTRING_LEN(str)) { + if (ENC_CODERANGE_CLEAN_P(cr)) { + /* Update the coderange regarding the extended part. */ + const char *const prev_end = RSTRING_END(str); + const char *const new_end = RSTRING_PTR(str) + len; + rb_encoding *enc = rb_enc_get(str); + rb_str_coderange_scan_restartable(prev_end, new_end, enc, &cr); + ENC_CODERANGE_SET(str, cr); + } + else if (cr == ENC_CODERANGE_BROKEN) { + /* May be valid now, by appended part. */ + ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN); + } + } + else if (len < RSTRING_LEN(str)) { + if (cr != ENC_CODERANGE_7BIT) { + /* ASCII-only string is keeping after truncated. Valid + * and broken may be invalid or valid, leave unknown. */ + ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN); + } + } + STR_SET_LEN(str, len); TERM_FILL(&RSTRING_PTR(str)[len], termlen); } diff --git a/test/-ext-/string/test_set_len.rb b/test/-ext-/string/test_set_len.rb index 67ba961194..e3eff75d9b 100644 --- a/test/-ext-/string/test_set_len.rb +++ b/test/-ext-/string/test_set_len.rb @@ -34,4 +34,33 @@ class Test_StrSetLen < Test::Unit::TestCase assert_equal 128, Bug::String.capacity(str) assert_equal 127, str.set_len(127).bytesize, bug12757 end + + def test_coderange_after_append + u = -"\u3042" + str = Bug::String.new(encoding: Encoding::UTF_8) + bsize = u.bytesize + str.append(u) + assert_equal 0, str.bytesize + str.set_len(bsize) + assert_equal bsize, str.bytesize + assert_predicate str, :valid_encoding? + assert_not_predicate str, :ascii_only? + assert_equal u, str + end + + def test_coderange_after_trunc + u = -"\u3042" + bsize = u.bytesize + str = Bug::String.new(u) + str.set_len(bsize - 1) + assert_equal bsize - 1, str.bytesize + assert_not_predicate str, :valid_encoding? + assert_not_predicate str, :ascii_only? + str.append(u.byteslice(-1)) + str.set_len(bsize) + assert_equal bsize, str.bytesize + assert_predicate str, :valid_encoding? + assert_not_predicate str, :ascii_only? + assert_equal u, str + end end |