summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmark/string_concat.yml26
-rw-r--r--string.c21
2 files changed, 31 insertions, 16 deletions
diff --git a/benchmark/string_concat.yml b/benchmark/string_concat.yml
index 0ff1dc25b6..b8a69ed909 100644
--- a/benchmark/string_concat.yml
+++ b/benchmark/string_concat.yml
@@ -1,9 +1,9 @@
prelude: |
CHUNK = "a" * 64
- BCHUNK = "a".b * 64
+ UCHUNK = "é" * 32
GC.disable # GC causes a lot of variance
benchmark:
- binary_concat_utf8: |
+ binary_concat_7bit: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
@@ -13,17 +13,7 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
- binary_concat_binary: |
- buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- utf8_concat_utf8: |
+ utf8_concat_7bit: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
@@ -33,3 +23,13 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
+ utf8_concat_UTF8: |
+ buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
+ buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
diff --git a/string.c b/string.c
index f3f5ea92cc..f5e089aa21 100644
--- a/string.c
+++ b/string.c
@@ -3329,9 +3329,24 @@ VALUE
rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr = rb_enc_str_coderange(str2);
- if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) {
- str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
- return str;
+
+ if (str_enc_fastpath(str)) {
+ switch (str2_cr) {
+ case ENC_CODERANGE_7BIT:
+ // If RHS is 7bit we can do simple concatenation
+ str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
+ return str;
+ case ENC_CODERANGE_VALID:
+ // If RHS is valid, we can do simple concatenation if encodings are the same
+ if (ENCODING_GET_INLINED(str) == ENCODING_GET_INLINED(str2)) {
+ str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
+ int str_cr = ENC_CODERANGE(str);
+ if (UNLIKELY(str_cr != ENC_CODERANGE_VALID)) {
+ ENC_CODERANGE_SET(str, RB_ENC_CODERANGE_AND(str_cr, str2_cr));
+ }
+ return str;
+ }
+ }
}
rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),