diff options
-rw-r--r-- | benchmark/string_concat.yml | 26 | ||||
-rw-r--r-- | string.c | 21 |
2 files changed, 31 insertions, 16 deletions
diff --git a/benchmark/string_concat.yml b/benchmark/string_concat.yml index 0ff1dc25b6..b8a69ed909 100644 --- a/benchmark/string_concat.yml +++ b/benchmark/string_concat.yml @@ -1,9 +1,9 @@ prelude: | CHUNK = "a" * 64 - BCHUNK = "a".b * 64 + UCHUNK = "é" * 32 GC.disable # GC causes a lot of variance benchmark: - binary_concat_utf8: | + binary_concat_7bit: | buffer = String.new(capacity: 4096, encoding: Encoding::BINARY) buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK @@ -13,17 +13,7 @@ benchmark: buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK - binary_concat_binary: | - buffer = String.new(capacity: 4096, encoding: Encoding::BINARY) - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK - utf8_concat_utf8: | + utf8_concat_7bit: | buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8) buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK @@ -33,3 +23,13 @@ benchmark: buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK + utf8_concat_UTF8: | + buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8) + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK + buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK @@ -3329,9 +3329,24 @@ VALUE rb_str_buf_append(VALUE str, VALUE str2) { int str2_cr = rb_enc_str_coderange(str2); - if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) { - str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true); - return str; + + if (str_enc_fastpath(str)) { + switch (str2_cr) { + case ENC_CODERANGE_7BIT: + // If RHS is 7bit we can do simple concatenation + str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true); + return str; + case ENC_CODERANGE_VALID: + // If RHS is valid, we can do simple concatenation if encodings are the same + if (ENCODING_GET_INLINED(str) == ENCODING_GET_INLINED(str2)) { + str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true); + int str_cr = ENC_CODERANGE(str); + if (UNLIKELY(str_cr != ENC_CODERANGE_VALID)) { + ENC_CODERANGE_SET(str, RB_ENC_CODERANGE_AND(str_cr, str2_cr)); + } + return str; + } + } } rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2), |