summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2023-11-02 11:02:43 +0100
committerJean Boussier <[email protected]>2023-11-02 23:34:58 +0100
commitac8ec004e5272d589caca30616dbe12862150188 (patch)
treea3fb421e8ad6ddf400130b81fe33805c7c352fab
parentee7bf4643d2716d44e13a732caf588b1a6275f7a (diff)
Make String.new size pools aware.
If the required capacity would fit in an embded string, returns one. This can reduce malloc churn for code that use string buffers.
-rw-r--r--string.c93
-rw-r--r--test/-ext-/string/test_capacity.rb2
2 files changed, 94 insertions, 1 deletions
diff --git a/string.c b/string.c
index 9f4a345726..13b63f023a 100644
--- a/string.c
+++ b/string.c
@@ -1874,6 +1874,98 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
return str;
}
+/* :nodoc: */
+static VALUE
+rb_str_s_new(int argc, VALUE *argv, VALUE klass)
+{
+ if (klass != rb_cString) {
+ return rb_class_new_instance_pass_kw(argc, argv, klass);
+ }
+
+ static ID keyword_ids[2];
+ VALUE orig, opt, encoding = Qnil, capacity = Qnil;
+ VALUE kwargs[2];
+ rb_encoding *enc = NULL;
+
+ int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
+ if (NIL_P(opt)) {
+ return rb_class_new_instance_pass_kw(argc, argv, klass);
+ }
+
+ keyword_ids[0] = rb_id_encoding();
+ CONST_ID(keyword_ids[1], "capacity");
+ rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
+ encoding = kwargs[0];
+ capacity = kwargs[1];
+
+ int termlen = 1;
+
+ if (n == 1) {
+ orig = StringValue(orig);
+ }
+ else {
+ orig = Qnil;
+ }
+
+ if (UNDEF_P(encoding)) {
+ if (!NIL_P(orig)) {
+ encoding = rb_obj_encoding(orig);
+ }
+ }
+
+ if (!UNDEF_P(encoding)) {
+ enc = rb_to_encoding(encoding);
+ termlen = rb_enc_mbminlen(enc);
+ }
+
+ // If capacity is nil, we're basically just duping `orig`.
+ if (UNDEF_P(capacity)) {
+ if (NIL_P(orig)) {
+ VALUE empty_str = str_new(klass, "", 0);
+ if (enc) {
+ rb_enc_associate(empty_str, enc);
+ }
+ return empty_str;
+ }
+ VALUE copy = str_duplicate(klass, orig);
+ rb_enc_associate(copy, enc);
+ ENC_CODERANGE_CLEAR(copy);
+ return copy;
+ }
+
+ long capa = 0;
+ capa = NUM2LONG(capacity);
+ if (capa < 0) {
+ capa = 0;
+ }
+
+ if (!NIL_P(orig)) {
+ long orig_capa = rb_str_capacity(orig);
+ if (orig_capa > capa) {
+ capa = orig_capa;
+ }
+ }
+
+ long fake_len = capa - termlen;
+ if (fake_len < 0) {
+ fake_len = 0;
+ }
+
+ VALUE str = str_new0(klass, NULL, fake_len, termlen);
+ STR_SET_LEN(str, 0);
+ TERM_FILL(RSTRING_PTR(str), termlen);
+
+ if (enc) {
+ rb_enc_associate(str, enc);
+ }
+
+ if (!NIL_P(orig)) {
+ rb_str_buf_append(str, orig);
+ }
+
+ return str;
+}
+
#ifdef NONASCII_MASK
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
@@ -11931,6 +12023,7 @@ Init_String(void)
st_foreach(rb_vm_fstring_table(), fstring_set_class_i, rb_cString);
rb_include_module(rb_cString, rb_mComparable);
rb_define_alloc_func(rb_cString, empty_str_alloc);
+ rb_define_singleton_method(rb_cString, "new", rb_str_s_new, -1);
rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1);
rb_define_method(rb_cString, "initialize", rb_str_init, -1);
rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb
index 71f91918e7..2c6c51fdda 100644
--- a/test/-ext-/string/test_capacity.rb
+++ b/test/-ext-/string/test_capacity.rb
@@ -23,7 +23,7 @@ class Test_StringCapacity < Test::Unit::TestCase
def test_s_new_capacity
assert_equal("", String.new(capacity: 1000))
assert_equal(String, String.new(capacity: 1000).class)
- assert_equal(10000, capa(String.new(capacity: 10000)))
+ assert_equal(10_000 - 1, capa(String.new(capacity: 10_000))) # Real capa doesn't account for termlen
assert_equal("", String.new(capacity: -1000))
assert_equal(capa(String.new(capacity: -10000)), capa(String.new(capacity: -1000)))