diff options
author | Your Name <[email protected]> | 2024-08-03 00:53:13 +0000 |
---|---|---|
committer | Alan Wu <[email protected]> | 2024-08-07 18:49:20 -0400 |
commit | 34715bdd910698e1aa9770b36c2b6e38e708c629 (patch) | |
tree | d345af17dbb2ec3d564eb26cdb0cd4a26ba330eb /vm.c | |
parent | e271feb8663415d9ed8a55e0e78bd655a16e0201 (diff) |
Tune codegen for rb_yield() calls landing in ISeqs
Unlike in older revisions in the year, GCC 11 isn't inlining the call
to vm_push_frame() inside invoke_iseq_block_from_c() anymore. We do
want it to be inlined since rb_yield() speed is fairly important.
Logs from -fopt-info-optimized-inline reveal that GCC was blowing its
code size budget inlining invoke_block_from_c_bh() into its various
callers, leaving suboptimal code for its body.
Take away some uses of the `inline` keyword and merge a common tail
call to vm_exec() for overall better code.
This tweak gives about 18% on a micro benchmark and 1% on the
chunky-png benchmark from yjit-bench. I tested on a Skylake server.
```
$ cat c-to-ruby-call.yml
benchmark:
- 0.upto(10_000_000) {}
$ benchmark-driver --chruby '+patch;master' c-to-ruby-call.yml
Warming up --------------------------------------
0.upto(10_000_000) {} 2.299 i/s - 3.000 times in 1.304689s (434.90ms/i)
Calculating -------------------------------------
+patch master
0.upto(10_000_000) {} 2.299 1.943 i/s - 6.000 times in 2.609393s 3.088353s
Comparison:
0.upto(10_000_000) {}
+patch: 2.3 i/s
master: 1.9 i/s - 1.18x slower
$ ruby run_benchmarks.rb --chruby 'master;+patch' chunky-png
<snip>
---------- ----------- ---------- ----------- ---------- -------------- -------------
bench master (ms) stddev (%) +patch (ms) stddev (%) +patch 1st itr master/+patch
chunky-png 1156.1 0.1 1142.2 0.2 1.01 1.01
---------- ----------- ---------- ----------- ---------- -------------- -------------
```
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/11321
Diffstat (limited to 'vm.c')
-rw-r--r-- | vm.c | 17 |
1 files changed, 7 insertions, 10 deletions
@@ -1509,7 +1509,7 @@ rb_binding_add_dynavars(VALUE bindval, rb_binding_t *bind, int dyncount, const I /* C -> Ruby: block */ -static inline VALUE +static inline void invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_cref_t *cref, VALUE type, int opt_pc) { int arg_size = ISEQ_BODY(iseq)->param.size; @@ -1521,15 +1521,13 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons ec->cfp->sp + arg_size, ISEQ_BODY(iseq)->local_table_size - arg_size, ISEQ_BODY(iseq)->stack_max); - return vm_exec(ec); } -static VALUE +static inline void invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc) { /* bmethod call from outside the VM */ int arg_size = ISEQ_BODY(iseq)->param.size; - VALUE ret; VM_ASSERT(me->def->type == VM_METHOD_TYPE_BMETHOD); @@ -1542,9 +1540,6 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co ISEQ_BODY(iseq)->stack_max); VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); - ret = vm_exec(ec); - - return ret; } ALWAYS_INLINE(static VALUE @@ -1591,14 +1586,16 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl cfp->sp = sp; if (me == NULL) { - return invoke_block(ec, iseq, self, captured, cref, type, opt_pc); + invoke_block(ec, iseq, self, captured, cref, type, opt_pc); } else { - return invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc); + invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc); } + + return vm_exec(ec); } -static inline VALUE +static VALUE invoke_block_from_c_bh(rb_execution_context_t *ec, VALUE block_handler, int argc, const VALUE *argv, int kw_splat, VALUE passed_block_handler, const rb_cref_t *cref, |