summaryrefslogtreecommitdiff
path: root/vm.c
diff options
context:
space:
mode:
authorYour Name <[email protected]>2024-08-03 00:53:13 +0000
committerAlan Wu <[email protected]>2024-08-07 18:49:20 -0400
commit34715bdd910698e1aa9770b36c2b6e38e708c629 (patch)
treed345af17dbb2ec3d564eb26cdb0cd4a26ba330eb /vm.c
parente271feb8663415d9ed8a55e0e78bd655a16e0201 (diff)
Tune codegen for rb_yield() calls landing in ISeqs
Unlike in older revisions in the year, GCC 11 isn't inlining the call to vm_push_frame() inside invoke_iseq_block_from_c() anymore. We do want it to be inlined since rb_yield() speed is fairly important. Logs from -fopt-info-optimized-inline reveal that GCC was blowing its code size budget inlining invoke_block_from_c_bh() into its various callers, leaving suboptimal code for its body. Take away some uses of the `inline` keyword and merge a common tail call to vm_exec() for overall better code. This tweak gives about 18% on a micro benchmark and 1% on the chunky-png benchmark from yjit-bench. I tested on a Skylake server. ``` $ cat c-to-ruby-call.yml benchmark: - 0.upto(10_000_000) {} $ benchmark-driver --chruby '+patch;master' c-to-ruby-call.yml Warming up -------------------------------------- 0.upto(10_000_000) {} 2.299 i/s - 3.000 times in 1.304689s (434.90ms/i) Calculating ------------------------------------- +patch master 0.upto(10_000_000) {} 2.299 1.943 i/s - 6.000 times in 2.609393s 3.088353s Comparison: 0.upto(10_000_000) {} +patch: 2.3 i/s master: 1.9 i/s - 1.18x slower $ ruby run_benchmarks.rb --chruby 'master;+patch' chunky-png <snip> ---------- ----------- ---------- ----------- ---------- -------------- ------------- bench master (ms) stddev (%) +patch (ms) stddev (%) +patch 1st itr master/+patch chunky-png 1156.1 0.1 1142.2 0.2 1.01 1.01 ---------- ----------- ---------- ----------- ---------- -------------- ------------- ```
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/11321
Diffstat (limited to 'vm.c')
-rw-r--r--vm.c17
1 files changed, 7 insertions, 10 deletions
diff --git a/vm.c b/vm.c
index b0ea81da31..17f34399b9 100644
--- a/vm.c
+++ b/vm.c
@@ -1509,7 +1509,7 @@ rb_binding_add_dynavars(VALUE bindval, rb_binding_t *bind, int dyncount, const I
/* C -> Ruby: block */
-static inline VALUE
+static inline void
invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_cref_t *cref, VALUE type, int opt_pc)
{
int arg_size = ISEQ_BODY(iseq)->param.size;
@@ -1521,15 +1521,13 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons
ec->cfp->sp + arg_size,
ISEQ_BODY(iseq)->local_table_size - arg_size,
ISEQ_BODY(iseq)->stack_max);
- return vm_exec(ec);
}
-static VALUE
+static inline void
invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc)
{
/* bmethod call from outside the VM */
int arg_size = ISEQ_BODY(iseq)->param.size;
- VALUE ret;
VM_ASSERT(me->def->type == VM_METHOD_TYPE_BMETHOD);
@@ -1542,9 +1540,6 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co
ISEQ_BODY(iseq)->stack_max);
VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);
- ret = vm_exec(ec);
-
- return ret;
}
ALWAYS_INLINE(static VALUE
@@ -1591,14 +1586,16 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
cfp->sp = sp;
if (me == NULL) {
- return invoke_block(ec, iseq, self, captured, cref, type, opt_pc);
+ invoke_block(ec, iseq, self, captured, cref, type, opt_pc);
}
else {
- return invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc);
+ invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc);
}
+
+ return vm_exec(ec);
}
-static inline VALUE
+static VALUE
invoke_block_from_c_bh(rb_execution_context_t *ec, VALUE block_handler,
int argc, const VALUE *argv,
int kw_splat, VALUE passed_block_handler, const rb_cref_t *cref,