diff options
author | John Hawthorn <[email protected]> | 2021-09-28 19:13:24 -0700 |
---|---|---|
committer | Aaron Patterson <[email protected]> | 2021-09-30 13:06:53 -0700 |
commit | bb488a1a7f844bb4f2b9fd561b29a0560a7bf06a (patch) | |
tree | 559e0a301906ca542427db626ee230573a68890b | |
parent | 529fc204af84f825f98f83c34b004acbaa802615 (diff) |
Use faster any_hash logic in rb_hash
From the documentation of rb_obj_hash:
> Certain core classes such as Integer use built-in hash calculations and
> do not call the #hash method when used as a hash key.
So if you override, say, Integer#hash it won't be used from rb_hash_aref
and similar. This avoids method lookups in many common cases.
This commit uses the same optimization in rb_hash, a method used
internally and in the C API to get the hash value of an object. Usually
this is used to build the hash of an object based on its elements.
Previously it would always do a method lookup for 'hash'.
This is primarily intended to speed up hashing of Arrays and Hashes,
which call rb_hash for each element.
compare-ruby: ruby 3.0.1p64 (2021-04-05 revision 0fb782ee38) [x86_64-linux]
built-ruby: ruby 3.1.0dev (2021-09-29T02:13:24Z fast_hash d670bf88b2) [x86_64-linux]
# Iteration per second (i/s)
| |compare-ruby|built-ruby|
|:----------------|-----------:|---------:|
|hash_aref_array | 1.008| 1.769|
| | -| 1.76x|
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/4916
-rw-r--r-- | benchmark/hash_aref_array.rb | 5 | ||||
-rw-r--r-- | hash.c | 59 |
2 files changed, 35 insertions, 29 deletions
diff --git a/benchmark/hash_aref_array.rb b/benchmark/hash_aref_array.rb new file mode 100644 index 0000000000..ac7a683d95 --- /dev/null +++ b/benchmark/hash_aref_array.rb @@ -0,0 +1,5 @@ +h = {} +arrays = (0..99).each_slice(10).to_a +#STDERR.puts arrays.inspect +arrays.each { |s| h[s] = s } +200_000.times { arrays.each { |s| h[s] } } @@ -122,33 +122,6 @@ hash_recursive(VALUE obj, VALUE arg, int recurse) return rb_funcallv(obj, id_hash, 0, 0); } -VALUE -rb_hash(VALUE obj) -{ - VALUE hval = rb_check_funcall_basic_kw(obj, id_hash, rb_mKernel, 0, 0, 0); - - if (hval == Qundef) { - hval = rb_exec_recursive_outer(hash_recursive, obj, 0); - } - - while (!FIXNUM_P(hval)) { - if (RB_BIGNUM_TYPE_P(hval)) { - int sign; - unsigned long ul; - sign = rb_integer_pack(hval, &ul, 1, sizeof(ul), 0, - INTEGER_PACK_NATIVE_BYTE_ORDER); - if (sign < 0) { - hval = LONG2FIX(ul | FIXNUM_MIN); - } - else { - hval = LONG2FIX(ul & FIXNUM_MAX); - } - } - hval = rb_to_int(hval); - } - return hval; -} - static long rb_objid_hash(st_index_t index); static st_index_t @@ -216,8 +189,29 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE)) static st_index_t obj_any_hash(VALUE obj) { - obj = rb_hash(obj); - return FIX2LONG(obj); + VALUE hval = rb_check_funcall_basic_kw(obj, id_hash, rb_mKernel, 0, 0, 0); + + if (hval == Qundef) { + hval = rb_exec_recursive_outer(hash_recursive, obj, 0); + } + + while (!FIXNUM_P(hval)) { + if (RB_TYPE_P(hval, T_BIGNUM)) { + int sign; + unsigned long ul; + sign = rb_integer_pack(hval, &ul, 1, sizeof(ul), 0, + INTEGER_PACK_NATIVE_BYTE_ORDER); + if (sign < 0) { + hval = LONG2FIX(ul | FIXNUM_MIN); + } + else { + hval = LONG2FIX(ul & FIXNUM_MAX); + } + } + hval = rb_to_int(hval); + } + + return FIX2LONG(hval); } static st_index_t @@ -226,6 +220,13 @@ rb_any_hash(VALUE a) return any_hash(a, obj_any_hash); } +VALUE +rb_hash(VALUE obj) +{ + return LONG2FIX(any_hash(obj, obj_any_hash)); +} + + /* Here is a hash function for 64-bit key. It is about 5 times faster (2 times faster when uint128 type is absent) on Haswell than tailored Spooky or City hash function can be. */ |