diff options
-rw-r--r-- | internal.h | 25 | ||||
-rw-r--r-- | mjit_compile.c | 2 | ||||
-rw-r--r-- | tool/ruby_vm/loaders/insns_def.rb | 2 | ||||
-rw-r--r-- | tool/ruby_vm/views/_mjit_compile_send.erb | 2 | ||||
-rw-r--r-- | vm_eval.c | 2 | ||||
-rw-r--r-- | vm_insnhelper.c | 61 |
6 files changed, 75 insertions, 19 deletions
diff --git a/internal.h b/internal.h index 703dd57699..1b27df0009 100644 --- a/internal.h +++ b/internal.h @@ -2357,10 +2357,32 @@ struct rb_execution_context_struct; struct rb_control_frame_struct; struct rb_calling_info; struct rb_call_data; +/* I have several reasons to chose 64 here: + * + * - A cache line must be a power-of-two size. + * - Setting this to anything less than or equal to 32 boosts nothing. + * - I have never seen an architecture that has 128 byte L1 cache line. + * - I know Intel Core and Sparc T4 at least uses 64. + * - I know jemalloc internally has this exact same `#define CACHE_LINE 64`. + * https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/jemalloc_internal_types.h + */ +#define CACHELINE 64 struct rb_call_cache { /* inline cache: keys */ rb_serial_t method_state; - rb_serial_t class_serial; + rb_serial_t class_serial[ + (CACHELINE + - sizeof(rb_serial_t) /* method_state */ + - sizeof(struct rb_callable_method_entry_struct *) /* me */ + - sizeof(struct rb_callable_method_definition_struct *) /* def */ + - sizeof(enum method_missing_reason) /* aux */ + - sizeof(VALUE (*)( /* call */ + struct rb_execution_context_struct *e, + struct rb_control_frame_struct *, + struct rb_calling_info *, + const struct rb_call_data *))) + / sizeof(rb_serial_t) + ]; /* inline cache: values */ const struct rb_callable_method_entry_struct *me; @@ -2377,6 +2399,7 @@ struct rb_call_cache { int inc_sp; /* used by cfunc */ } aux; }; +STATIC_ASSERT(cachelined, sizeof(struct rb_call_cache) <= CACHELINE); struct rb_call_info { /* fixed at compile time */ ID mid; diff --git a/mjit_compile.c b/mjit_compile.c index 27ea836ef4..bf5143f6ed 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -87,7 +87,7 @@ has_valid_method_type(CALL_CACHE cc) { extern bool mjit_valid_class_serial_p(rb_serial_t class_serial); return GET_GLOBAL_METHOD_STATE() == cc->method_state - && mjit_valid_class_serial_p(cc->class_serial) && cc->me; + && mjit_valid_class_serial_p(cc->class_serial[0]) && cc->me; } // Returns true if iseq can use fastpath for setup, otherwise NULL. This becomes true in the same condition diff --git a/tool/ruby_vm/loaders/insns_def.rb b/tool/ruby_vm/loaders/insns_def.rb index a29d13a661..47e4ba29f5 100644 --- a/tool/ruby_vm/loaders/insns_def.rb +++ b/tool/ruby_vm/loaders/insns_def.rb @@ -21,7 +21,7 @@ grammar = %r' (?<keyword> typedef | extern | static | auto | register | struct | union | enum ){0} (?<C> (?: \g<block> | [^{}]+ )* ){0} - (?<block> \{ \g<ws>* ^ \g<C> $ \g<ws>* \} ){0} + (?<block> \{ \g<ws>* \g<C> \g<ws>* \} ){0} (?<ws> \g<comment> | \s ){0} (?<ident> [_a-zA-Z] [0-9_a-zA-Z]* ){0} (?<type> (?: \g<keyword> \g<ws>+ )* \g<ident> ){0} diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb index 95e7846820..ec8eec5589 100644 --- a/tool/ruby_vm/views/_mjit_compile_send.erb +++ b/tool/ruby_vm/views/_mjit_compile_send.erb @@ -36,7 +36,7 @@ % # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things. fprintf(f, " if (UNLIKELY(GET_GLOBAL_METHOD_STATE() != %"PRI_SERIALT_PREFIX"u ||\n", cc_copy->method_state); - fprintf(f, " RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc_copy->class_serial); + fprintf(f, " RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc_copy->class_serial[0]); fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size); fprintf(f, " goto send_cancel;\n"); @@ -47,7 +47,7 @@ rb_vm_call0(rb_execution_context_t *ec, VALUE recv, ID id, int argc, const VALUE { struct rb_calling_info calling = { Qundef, recv, argc, kw_splat, }; struct rb_call_info ci = { id, (kw_splat ? VM_CALL_KW_SPLAT : 0), argc, }; - struct rb_call_cache cc = { 0, 0, me, me->def, vm_call_general, { 0, }, }; + struct rb_call_cache cc = { 0, { 0, }, me, me->def, vm_call_general, { 0, }, }; struct rb_call_data cd = { cc, ci, }; return vm_call0_body(ec, &calling, &cd, argv); } diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 5e1cfccf3c..f8be5f6f33 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1422,16 +1422,58 @@ rb_vm_search_method_slowpath(struct rb_call_data *cd, VALUE klass) struct rb_call_cache *cc = &cd->cc; const rb_callable_method_entry_t *me = rb_callable_method_entry(klass, ci->mid); - *cc = (struct rb_call_cache) { + struct rb_call_cache buf = { GET_GLOBAL_METHOD_STATE(), - RCLASS_SERIAL(klass), + { RCLASS_SERIAL(klass) }, me, me ? me->def : NULL, calccall(cd, me), }; + if (buf.call != vm_call_general) { + for (int i = 0; i < numberof(cc->class_serial) - 1; i++) { + buf.class_serial[i + 1] = cc->class_serial[i]; + } + } + MEMCPY(cc, &buf, struct rb_call_cache, 1); VM_ASSERT(callable_method_entry_p(cc->me)); } +static inline bool +vm_cache_check_for_class_serial(struct rb_call_cache *cc, rb_serial_t class_serial) +{ + int i; + rb_serial_t j; + + for (i = 0; i < numberof(cc->class_serial); i++) { + j = cc->class_serial[i]; + + if (! j) { + break; + } + else if (j != class_serial) { + continue; + } + else if (! i) { + return true; + } + else { + goto hit; + } + } + + RB_DEBUG_COUNTER_INC(mc_class_serial_miss); + return false; + + hit: + for (; i > 0; i--) { + cc->class_serial[i] = cc->class_serial[i - 1]; + } + + cc->class_serial[0] = j; + MEMZERO(&cc->aux, cc->aux, 1); /* cc->call is valid, but cc->aux might not. */ + return true; +} + static void vm_search_method_fastpath(struct rb_call_data *cd, VALUE klass) { @@ -1440,8 +1482,7 @@ vm_search_method_fastpath(struct rb_call_data *cd, VALUE klass) #if OPT_INLINE_METHOD_CACHE if (LIKELY(RB_DEBUG_COUNTER_INC_UNLESS(mc_global_state_miss, GET_GLOBAL_METHOD_STATE() == cc->method_state) && - RB_DEBUG_COUNTER_INC_UNLESS(mc_class_serial_miss, - RCLASS_SERIAL(klass) == cc->class_serial))) { + vm_cache_check_for_class_serial(cc, RCLASS_SERIAL(klass)))) { /* cache hit! */ VM_ASSERT(cc->call != NULL); RB_DEBUG_COUNTER_INC(mc_inline_hit); @@ -1605,24 +1646,16 @@ opt_eql_func(VALUE recv, VALUE obj, CALL_DATA cd) VALUE rb_equal_opt(VALUE obj1, VALUE obj2) { - struct rb_call_data cd; + struct rb_call_data cd = { .ci = { .mid = idEq, }, }; - cd.ci.mid = idEq; - cd.cc.method_state = 0; - cd.cc.class_serial = 0; - cd.cc.me = NULL; return opt_eq_func(obj1, obj2, &cd); } VALUE rb_eql_opt(VALUE obj1, VALUE obj2) { - struct rb_call_data cd; + struct rb_call_data cd = { .ci = { .mid = idEqlP, }, }; - cd.ci.mid = idEqlP; - cd.cc.method_state = 0; - cd.cc.class_serial = 0; - cd.cc.me = NULL; return opt_eql_func(obj1, obj2, &cd); } |