diff options
author | Jean Boussier <[email protected]> | 2023-11-02 11:02:43 +0100 |
---|---|---|
committer | Jean Boussier <[email protected]> | 2023-11-02 23:34:58 +0100 |
commit | ac8ec004e5272d589caca30616dbe12862150188 (patch) | |
tree | a3fb421e8ad6ddf400130b81fe33805c7c352fab | |
parent | ee7bf4643d2716d44e13a732caf588b1a6275f7a (diff) |
Make String.new size pools aware.
If the required capacity would fit in an embded string,
returns one.
This can reduce malloc churn for code that use string buffers.
-rw-r--r-- | string.c | 93 | ||||
-rw-r--r-- | test/-ext-/string/test_capacity.rb | 2 |
2 files changed, 94 insertions, 1 deletions
@@ -1874,6 +1874,98 @@ rb_str_init(int argc, VALUE *argv, VALUE str) return str; } +/* :nodoc: */ +static VALUE +rb_str_s_new(int argc, VALUE *argv, VALUE klass) +{ + if (klass != rb_cString) { + return rb_class_new_instance_pass_kw(argc, argv, klass); + } + + static ID keyword_ids[2]; + VALUE orig, opt, encoding = Qnil, capacity = Qnil; + VALUE kwargs[2]; + rb_encoding *enc = NULL; + + int n = rb_scan_args(argc, argv, "01:", &orig, &opt); + if (NIL_P(opt)) { + return rb_class_new_instance_pass_kw(argc, argv, klass); + } + + keyword_ids[0] = rb_id_encoding(); + CONST_ID(keyword_ids[1], "capacity"); + rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs); + encoding = kwargs[0]; + capacity = kwargs[1]; + + int termlen = 1; + + if (n == 1) { + orig = StringValue(orig); + } + else { + orig = Qnil; + } + + if (UNDEF_P(encoding)) { + if (!NIL_P(orig)) { + encoding = rb_obj_encoding(orig); + } + } + + if (!UNDEF_P(encoding)) { + enc = rb_to_encoding(encoding); + termlen = rb_enc_mbminlen(enc); + } + + // If capacity is nil, we're basically just duping `orig`. + if (UNDEF_P(capacity)) { + if (NIL_P(orig)) { + VALUE empty_str = str_new(klass, "", 0); + if (enc) { + rb_enc_associate(empty_str, enc); + } + return empty_str; + } + VALUE copy = str_duplicate(klass, orig); + rb_enc_associate(copy, enc); + ENC_CODERANGE_CLEAR(copy); + return copy; + } + + long capa = 0; + capa = NUM2LONG(capacity); + if (capa < 0) { + capa = 0; + } + + if (!NIL_P(orig)) { + long orig_capa = rb_str_capacity(orig); + if (orig_capa > capa) { + capa = orig_capa; + } + } + + long fake_len = capa - termlen; + if (fake_len < 0) { + fake_len = 0; + } + + VALUE str = str_new0(klass, NULL, fake_len, termlen); + STR_SET_LEN(str, 0); + TERM_FILL(RSTRING_PTR(str), termlen); + + if (enc) { + rb_enc_associate(str, enc); + } + + if (!NIL_P(orig)) { + rb_str_buf_append(str, orig); + } + + return str; +} + #ifdef NONASCII_MASK #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80) @@ -11931,6 +12023,7 @@ Init_String(void) st_foreach(rb_vm_fstring_table(), fstring_set_class_i, rb_cString); rb_include_module(rb_cString, rb_mComparable); rb_define_alloc_func(rb_cString, empty_str_alloc); + rb_define_singleton_method(rb_cString, "new", rb_str_s_new, -1); rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1); rb_define_method(rb_cString, "initialize", rb_str_init, -1); rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1); diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb index 71f91918e7..2c6c51fdda 100644 --- a/test/-ext-/string/test_capacity.rb +++ b/test/-ext-/string/test_capacity.rb @@ -23,7 +23,7 @@ class Test_StringCapacity < Test::Unit::TestCase def test_s_new_capacity assert_equal("", String.new(capacity: 1000)) assert_equal(String, String.new(capacity: 1000).class) - assert_equal(10000, capa(String.new(capacity: 10000))) + assert_equal(10_000 - 1, capa(String.new(capacity: 10_000))) # Real capa doesn't account for termlen assert_equal("", String.new(capacity: -1000)) assert_equal(capa(String.new(capacity: -10000)), capa(String.new(capacity: -1000))) |