diff options
author | John Hawthorn <[email protected]> | 2025-03-05 14:21:48 -0800 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2025-03-06 11:05:43 +0900 |
commit | bb6b3c98dc06b92c48928d8c7f942b3dfd990089 (patch) | |
tree | da264108cd693fb010ee7cfca2af92cdf09f32dd | |
parent | 977d711ba103f1d6b9656e891461a1c0e5cb35f8 (diff) |
Replace tombstone when converting AR to ST hash
[Bug #21170]
st_table reserves -1 as a special hash value to indicate that an entry
has been deleted. So that that's a valid value to be returned from the
hash function, do_hash replaces -1 with 0 so that it is not mistaken for
the sentinel.
Previously, when upgrading an AR table to an ST table,
rb_st_add_direct_with_hash was used which did not perform the same
conversion, this could lead to a hash in a broken state where one if its
entries which was supposed to exist being marked as a tombstone.
The hash could then become further corrupted when the ST table required
resizing as the falsely tombstoned entry would be skipped but it would
be counted in num entries, leading to an uninitialized entry at index
15.
In most cases this will be really rare, unless using a very poorly
implemented custom hash function.
This also adds two debug assertions, one that st_add_direct_with_hash
does not receive the reserved hash value, and a second in
rebuild_table_with, which ensures that after we rebuild/compact a table
it contains the expected number of elements.
Co-authored-by: Alan Wu <[email protected]>
-rw-r--r-- | common.mk | 1 | ||||
-rw-r--r-- | st.c | 23 | ||||
-rw-r--r-- | test/ruby/test_hash.rb | 14 |
3 files changed, 32 insertions, 6 deletions
@@ -14894,6 +14894,7 @@ st.$(OBJEXT): {$(VPATH)}internal/variable.h st.$(OBJEXT): {$(VPATH)}internal/warning_push.h st.$(OBJEXT): {$(VPATH)}internal/xmalloc.h st.$(OBJEXT): {$(VPATH)}missing.h +st.$(OBJEXT): {$(VPATH)}ruby_assert.h st.$(OBJEXT): {$(VPATH)}st.c st.$(OBJEXT): {$(VPATH)}st.h st.$(OBJEXT): {$(VPATH)}subst.h @@ -103,11 +103,13 @@ #ifdef NOT_RUBY #include "regint.h" #include "st.h" +#include <assert.h> #else #include "internal.h" #include "internal/bits.h" #include "internal/hash.h" #include "internal/sanitizers.h" +#include "ruby_assert.h" #endif #include <stdio.h> @@ -115,7 +117,6 @@ #include <stdlib.h> #endif #include <string.h> -#include <assert.h> #ifdef __GNUC__ #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p) @@ -313,17 +314,22 @@ static const struct st_features features[] = { #define RESERVED_HASH_VAL (~(st_hash_t) 0) #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0) -/* Return hash value of KEY for table TAB. */ static inline st_hash_t -do_hash(st_data_t key, st_table *tab) +normalize_hash_value(st_hash_t hash) { - st_hash_t hash = (st_hash_t)(tab->type->hash)(key); - /* RESERVED_HASH_VAL is used for a deleted entry. Map it into another value. Such mapping should be extremely rare. */ return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash; } +/* Return hash value of KEY for table TAB. */ +static inline st_hash_t +do_hash(st_data_t key, st_table *tab) +{ + st_hash_t hash = (st_hash_t)(tab->type->hash)(key); + return normalize_hash_value(hash); +} + /* Power of 2 defining the minimal number of allocated entries. */ #define MINIMAL_POWER2 2 @@ -764,6 +770,9 @@ rebuild_table_with(st_table *new_tab, st_table *tab) new_tab->num_entries++; ni++; } + + assert(new_tab->num_entries == tab->num_entries); + if (new_tab != tab) { tab->entry_power = new_tab->entry_power; tab->bin_power = new_tab->bin_power; @@ -1146,6 +1155,8 @@ st_add_direct_with_hash(st_table *tab, st_index_t ind; st_index_t bin_ind; + assert(hash != RESERVED_HASH_VAL); + rebuild_table_if_necessary(tab); ind = tab->entries_bound++; entry = &tab->entries[ind]; @@ -1163,7 +1174,7 @@ void rb_st_add_direct_with_hash(st_table *tab, st_data_t key, st_data_t value, st_hash_t hash) { - st_add_direct_with_hash(tab, key, value, hash); + st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash)); } /* Insert (KEY, VALUE) into table TAB. The table should not have diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index cef9f88a3c..58fbfea1de 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -2275,4 +2275,18 @@ class TestHash < Test::Unit::TestCase end end; end + + def test_ar_to_st_reserved_value + klass = Class.new do + attr_reader :hash + def initialize(val) = @hash = val + end + + values = 0.downto(-16).to_a + hash = {} + values.each do |val| + hash[klass.new(val)] = val + end + assert_equal values, hash.values, "[ruby-core:121239] [Bug #21170]" + end end |