diff options
author | Alan Wu <[email protected]> | 2019-07-30 21:36:05 -0400 |
---|---|---|
committer | 卜部昌平 <[email protected]> | 2019-10-24 18:03:42 +0900 |
commit | 89e7997622038f82115f34dbb4ea382e02bed163 (patch) | |
tree | 993a5f6fb17418381e835be1fd51093dc620148a /vm_core.h | |
parent | 38e931fa2ceac6d922f3eabedb8f35f211de0bdb (diff) |
Combine call info and cache to speed up method invocation
To perform a regular method call, the VM needs two structs,
`rb_call_info` and `rb_call_cache`. At the moment, we allocate these two
structures in separate buffers. In the worst case, the CPU needs to read
4 cache lines to complete a method call. Putting the two structures
together reduces the maximum number of cache line reads to 2.
Combining the structures also saves 8 bytes per call site as the current
layout uses separate two pointers for the call info and the call cache.
This saves about 2 MiB on Discourse.
This change improves the Optcarrot benchmark at least 3%. For more
details, see attached bugs.ruby-lang.org ticket.
Complications:
- A new instruction attribute `comptime_sp_inc` is introduced to
calculate SP increase at compile time without using call caches. At
compile time, a `TS_CALLDATA` operand points to a call info struct, but
at runtime, the same operand points to a call data struct. Instruction
that explicitly define `sp_inc` also need to define `comptime_sp_inc`.
- MJIT code for copying call cache becomes slightly more complicated.
- This changes the bytecode format, which might break existing tools.
[Misc #16258]
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/2564
Diffstat (limited to 'vm_core.h')
-rw-r--r-- | vm_core.h | 23 |
1 files changed, 17 insertions, 6 deletions
@@ -258,6 +258,16 @@ struct rb_calling_info { int kw_splat; }; +struct rb_call_data { + struct rb_call_cache cc; + struct rb_call_info ci; +}; + +struct rb_kwarg_call_data { + struct rb_call_cache cc; + struct rb_call_info_with_kwarg ci_kw; +}; + struct rb_execution_context_struct; typedef VALUE (*vm_call_handler)(struct rb_execution_context_struct *ec, struct rb_control_frame_struct *cfp, struct rb_calling_info *calling, const struct rb_call_info *ci, struct rb_call_cache *cc); @@ -417,12 +427,12 @@ struct rb_iseq_constant_body { struct rb_iseq_struct *local_iseq; /* local_iseq->flip_cnt can be modified */ union iseq_inline_storage_entry *is_entries; - struct rb_call_info *ci_entries; /* struct rb_call_info ci_entries[ci_size]; - * struct rb_call_info_with_kwarg cikw_entries[ci_kw_size]; - * So that: - * struct rb_call_info_with_kwarg *cikw_entries = &body->ci_entries[ci_size]; - */ - struct rb_call_cache *cc_entries; /* size is ci_size + ci_kw_size */ + struct rb_call_data *call_data; /* A buffer for two arrays: + * struct rb_call_data calls[ci_size]; + * struct rb_kwarg_call_data kw_calls[ci_kw_size]; + * Such that: + * struct rb_kwarg_call_data *kw_calls = &body->call_data[ci_size]; + */ struct { rb_snum_t flip_count; @@ -1121,6 +1131,7 @@ typedef struct iseq_inline_cache_entry *IC; typedef union iseq_inline_storage_entry *ISE; typedef struct rb_call_info *CALL_INFO; typedef struct rb_call_cache *CALL_CACHE; +typedef struct rb_call_data *CALL_DATA; void rb_vm_change_state(void); |