diff options
author | Jeremy Evans <[email protected]> | 2024-01-30 17:15:44 -0800 |
---|---|---|
committer | Jeremy Evans <[email protected]> | 2024-02-11 22:48:38 -0800 |
commit | c20e819e8b04e84a4103ca9a036022543459c213 (patch) | |
tree | 32b354171e526dc60aff66dcae5cbbd2b0b6c8cd /vm_args.c | |
parent | 93accfdf48d38160a11c2f0f8ed978f4cdbfe3cd (diff) |
Fix crash when passing large keyword splat to method accepting keywords and keyword splat
The following code previously caused a crash:
```ruby
h = {}
1000000.times{|i| h[i.to_s.to_sym] = i}
def f(kw: 1, **kws) end
f(**h)
```
Inside a thread or fiber, the size of the keyword splat could be much smaller
and still cause a crash.
I found this issue while optimizing method calling by reducing implicit
allocations. Given the following code:
```ruby
def f(kw: , **kws) end
kw = {kw: 1}
f(**kw)
```
The `f(**kw)` call previously allocated two hashes callee side instead of a
single hash. This is because `setup_parameters_complex` would extract the
keywords from the keyword splat hash to the C stack, to attempt to mirror
the case when literal keywords are passed without a keyword splat. Then,
`make_rest_kw_hash` would build a new hash based on the extracted keywords
that weren't used for literal keywords.
Switch the implementation so that if a keyword splat is passed, literal keywords
are deleted from the keyword splat hash (or a copy of the hash if the hash is
not mutable).
In addition to avoiding the crash, this new approach is much more
efficient in all cases. With the included benchmark:
```
1
miniruby: 5247879.9 i/s
miniruby-before: 2474050.2 i/s - 2.12x slower
1_mutable
miniruby: 1797036.5 i/s
miniruby-before: 1239543.3 i/s - 1.45x slower
10
miniruby: 1094750.1 i/s
miniruby-before: 365529.6 i/s - 2.99x slower
10_mutable
miniruby: 407781.7 i/s
miniruby-before: 225364.0 i/s - 1.81x slower
100
miniruby: 100992.3 i/s
miniruby-before: 32703.6 i/s - 3.09x slower
100_mutable
miniruby: 40092.3 i/s
miniruby-before: 21266.9 i/s - 1.89x slower
1000
miniruby: 21694.2 i/s
miniruby-before: 4949.8 i/s - 4.38x slower
1000_mutable
miniruby: 5819.5 i/s
miniruby-before: 2995.0 i/s - 1.94x slower
```
Diffstat (limited to 'vm_args.c')
-rw-r--r-- | vm_args.c | 104 |
1 files changed, 79 insertions, 25 deletions
@@ -396,6 +396,83 @@ args_setup_kw_parameters(rb_execution_context_t *const ec, const rb_iseq_t *cons locals[key_num] = unspecified_bits_value; } +static void +args_setup_kw_parameters_from_kwsplat(rb_execution_context_t *const ec, const rb_iseq_t *const iseq, + VALUE keyword_hash, VALUE *const locals) +{ + const ID *acceptable_keywords = ISEQ_BODY(iseq)->param.keyword->table; + const int req_key_num = ISEQ_BODY(iseq)->param.keyword->required_num; + const int key_num = ISEQ_BODY(iseq)->param.keyword->num; + const VALUE * const default_values = ISEQ_BODY(iseq)->param.keyword->default_values; + VALUE missing = 0; + int i, di; + int unspecified_bits = 0; + VALUE unspecified_bits_value = Qnil; + + for (i=0; i<req_key_num; i++) { + VALUE key = ID2SYM(acceptable_keywords[i]); + VALUE deleted_value = rb_hash_delete_entry(keyword_hash, key); + if (!UNDEF_P(deleted_value)) { + locals[i] = deleted_value; + } + else { + if (!missing) missing = rb_ary_hidden_new(1); + rb_ary_push(missing, key); + } + } + + if (missing) argument_kw_error(ec, iseq, "missing", missing); + + for (di=0; i<key_num; i++, di++) { + VALUE key = ID2SYM(acceptable_keywords[i]); + VALUE deleted_value = rb_hash_delete_entry(keyword_hash, key); + if (!UNDEF_P(deleted_value)) { + locals[i] = deleted_value; + } + else { + if (UNDEF_P(default_values[di])) { + locals[i] = Qnil; + + if (LIKELY(i < KW_SPECIFIED_BITS_MAX)) { + unspecified_bits |= 0x01 << di; + } + else { + if (NIL_P(unspecified_bits_value)) { + /* fixnum -> hash */ + int j; + unspecified_bits_value = rb_hash_new(); + + for (j=0; j<KW_SPECIFIED_BITS_MAX; j++) { + if (unspecified_bits & (0x01 << j)) { + rb_hash_aset(unspecified_bits_value, INT2FIX(j), Qtrue); + } + } + } + rb_hash_aset(unspecified_bits_value, INT2FIX(di), Qtrue); + } + } + else { + locals[i] = default_values[di]; + } + } + } + + if (ISEQ_BODY(iseq)->param.flags.has_kwrest) { + const int rest_hash_index = key_num + 1; + locals[rest_hash_index] = keyword_hash; + } + else { + if (!RHASH_EMPTY_P(keyword_hash)) { + argument_kw_error(ec, iseq, "unknown", rb_hash_keys(keyword_hash)); + } + } + + if (NIL_P(unspecified_bits_value)) { + unspecified_bits_value = INT2FIX(unspecified_bits); + } + locals[key_num] = unspecified_bits_value; +} + static inline void args_setup_kw_rest_parameter(VALUE keyword_hash, VALUE *locals, int kw_flag) { @@ -415,22 +492,6 @@ args_setup_block_parameter(const rb_execution_context_t *ec, struct rb_calling_i *locals = rb_vm_bh_to_procval(ec, block_handler); } -struct fill_values_arg { - VALUE *keys; - VALUE *vals; - int argc; -}; - -static int -fill_keys_values(st_data_t key, st_data_t val, st_data_t ptr) -{ - struct fill_values_arg *arg = (struct fill_values_arg *)ptr; - int i = arg->argc++; - arg->keys[i] = (VALUE)key; - arg->vals[i] = (VALUE)val; - return ST_CONTINUE; -} - static inline int ignore_keyword_hash_p(VALUE keyword_hash, const rb_iseq_t * const iseq, unsigned int * kw_flag, VALUE * converted_keyword_hash) { @@ -746,15 +807,8 @@ setup_parameters_complex(rb_execution_context_t * const ec, const rb_iseq_t * co args_setup_kw_parameters(ec, iseq, args->kw_argv, kw_arg->keyword_len, kw_arg->keywords, klocals); } else if (!NIL_P(keyword_hash)) { - int kw_len = rb_long2int(RHASH_SIZE(keyword_hash)); - struct fill_values_arg arg; - /* copy kw_argv */ - arg.keys = args->kw_argv = ALLOCA_N(VALUE, kw_len * 2); - arg.vals = arg.keys + kw_len; - arg.argc = 0; - rb_hash_foreach(keyword_hash, fill_keys_values, (VALUE)&arg); - VM_ASSERT(arg.argc == kw_len); - args_setup_kw_parameters(ec, iseq, arg.vals, kw_len, arg.keys, klocals); + keyword_hash = check_kwrestarg(keyword_hash, &kw_flag); + args_setup_kw_parameters_from_kwsplat(ec, iseq, keyword_hash, klocals); } else { VM_ASSERT(args_argc(args) == 0); |