diff options
author | Peter Zhu <[email protected]> | 2023-01-11 14:25:34 -0500 |
---|---|---|
committer | Peter Zhu <[email protected]> | 2023-01-12 09:06:15 -0500 |
commit | bfc887f391fde6de9d088039509f6e3eaa40b3ca (patch) | |
tree | 9e5604354b561dbb8ce350aac40bb6c36e235148 /string.c | |
parent | 207f8d0027d679780d4f3962c305d36885feb652 (diff) |
Add str_enc_copy_direct
This commit adds str_enc_copy_direct, which is like str_enc_copy but
does not check the frozen status of str1 and does not check the validity
of the encoding of str2. This makes certain string operations ~5% faster.
```ruby
puts(Benchmark.measure do
100_000_000.times do
"a".downcase
end
end)
```
Before this patch:
```
7.587598 0.040858 7.628456 ( 7.669022)
```
After this patch:
```
7.133128 0.039809 7.172937 ( 7.183124)
```
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7106
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 24 |
1 files changed, 19 insertions, 5 deletions
@@ -768,6 +768,20 @@ str_enc_copy(VALUE str1, VALUE str2) rb_enc_set_index(str1, ENCODING_GET(str2)); } +/* Like str_enc_copy, but does not check frozen status of str1. + * You should use this only if you're certain that str1 is not frozen. */ +static inline void +str_enc_copy_direct(VALUE str1, VALUE str2) +{ + int inlined_encoding = RB_ENCODING_GET_INLINED(str2); + if (inlined_encoding == ENCODING_INLINE_MAX) { + rb_enc_set_index(str1, rb_enc_get_index(str2)); + } + else { + ENCODING_SET_INLINED(str1, inlined_encoding); + } +} + static void rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src) { @@ -6156,7 +6170,7 @@ str_byte_substr(VALUE str, long beg, long len, int empty) VALUE str2 = str_subseq(str, beg, len); - str_enc_copy(str2, str); + str_enc_copy_direct(str2, str); if (RSTRING_LEN(str2) == 0) { if (!rb_enc_asciicompat(STR_ENC_GET(str))) @@ -6382,7 +6396,7 @@ rb_str_reverse(VALUE str) } } STR_SET_LEN(rev, RSTRING_LEN(str)); - str_enc_copy(rev, str); + str_enc_copy_direct(rev, str); ENC_CODERANGE_SET(rev, cr); return rev; @@ -7337,7 +7351,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) RB_GC_GUARD(buffer_anchor); /* TODO: check about string terminator character */ - str_enc_copy(target, source); + str_enc_copy_direct(target, source); /*ENC_CODERANGE_SET(mapped, cr);*/ return target; @@ -7468,7 +7482,7 @@ rb_str_upcase(int argc, VALUE *argv, VALUE str) enc = str_true_enc(str); if (case_option_single_p(flags, enc, str)) { ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str)); - str_enc_copy(ret, str); + str_enc_copy_direct(ret, str); upcase_single(ret); } else if (flags&ONIGENC_CASE_ASCII_ONLY) { @@ -7570,7 +7584,7 @@ rb_str_downcase(int argc, VALUE *argv, VALUE str) enc = str_true_enc(str); if (case_option_single_p(flags, enc, str)) { ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str)); - str_enc_copy(ret, str); + str_enc_copy_direct(ret, str); downcase_single(ret); } else if (flags&ONIGENC_CASE_ASCII_ONLY) { |