diff options
author | 卜部昌平 <[email protected]> | 2019-10-07 12:59:57 +0900 |
---|---|---|
committer | 卜部昌平 <[email protected]> | 2019-11-07 17:41:30 +0900 |
commit | d45a013a1a3bcc860e6f7f303220b3297e2abdbc (patch) | |
tree | 9dd459d6b41542cdfcd75cede71a96e06981e3e0 /internal.h | |
parent | 3c252651e1ee28d015dbe1648dfdf0140232b733 (diff) |
extend rb_call_cache
Prior to this changeset, majority of inline cache mishits resulted
into the same method entry when rb_callable_method_entry() resolves
a method search. Let's not call the function at the first place on
such situations.
In doing so we extend the struct rb_call_cache from 44 bytes (in
case of 64 bit machine) to 64 bytes, and fill the gap with
secondary class serial(s). Call cache's class serials now behavies
as a LRU cache.
Calculating -------------------------------------
ours 2.7 2.6
vm2_poly_same_method 2.339M 1.744M 1.369M i/s - 6.000M times in 2.565086s 3.441329s 4.381386s
Comparison:
vm2_poly_same_method
ours: 2339103.0 i/s
2.7: 1743512.3 i/s - 1.34x slower
2.6: 1369429.8 i/s - 1.71x slower
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/2583
Diffstat (limited to 'internal.h')
-rw-r--r-- | internal.h | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/internal.h b/internal.h index 703dd57699..1b27df0009 100644 --- a/internal.h +++ b/internal.h @@ -2357,10 +2357,32 @@ struct rb_execution_context_struct; struct rb_control_frame_struct; struct rb_calling_info; struct rb_call_data; +/* I have several reasons to chose 64 here: + * + * - A cache line must be a power-of-two size. + * - Setting this to anything less than or equal to 32 boosts nothing. + * - I have never seen an architecture that has 128 byte L1 cache line. + * - I know Intel Core and Sparc T4 at least uses 64. + * - I know jemalloc internally has this exact same `#define CACHE_LINE 64`. + * https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/jemalloc_internal_types.h + */ +#define CACHELINE 64 struct rb_call_cache { /* inline cache: keys */ rb_serial_t method_state; - rb_serial_t class_serial; + rb_serial_t class_serial[ + (CACHELINE + - sizeof(rb_serial_t) /* method_state */ + - sizeof(struct rb_callable_method_entry_struct *) /* me */ + - sizeof(struct rb_callable_method_definition_struct *) /* def */ + - sizeof(enum method_missing_reason) /* aux */ + - sizeof(VALUE (*)( /* call */ + struct rb_execution_context_struct *e, + struct rb_control_frame_struct *, + struct rb_calling_info *, + const struct rb_call_data *))) + / sizeof(rb_serial_t) + ]; /* inline cache: values */ const struct rb_callable_method_entry_struct *me; @@ -2377,6 +2399,7 @@ struct rb_call_cache { int inc_sp; /* used by cfunc */ } aux; }; +STATIC_ASSERT(cachelined, sizeof(struct rb_call_cache) <= CACHELINE); struct rb_call_info { /* fixed at compile time */ ID mid; |