summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hawthorn <[email protected]>2025-03-05 14:21:48 -0800
committerHiroshi SHIBATA <[email protected]>2025-03-06 11:05:43 +0900
commitbb6b3c98dc06b92c48928d8c7f942b3dfd990089 (patch)
treeda264108cd693fb010ee7cfca2af92cdf09f32dd
parent977d711ba103f1d6b9656e891461a1c0e5cb35f8 (diff)
Replace tombstone when converting AR to ST hash
[Bug #21170] st_table reserves -1 as a special hash value to indicate that an entry has been deleted. So that that's a valid value to be returned from the hash function, do_hash replaces -1 with 0 so that it is not mistaken for the sentinel. Previously, when upgrading an AR table to an ST table, rb_st_add_direct_with_hash was used which did not perform the same conversion, this could lead to a hash in a broken state where one if its entries which was supposed to exist being marked as a tombstone. The hash could then become further corrupted when the ST table required resizing as the falsely tombstoned entry would be skipped but it would be counted in num entries, leading to an uninitialized entry at index 15. In most cases this will be really rare, unless using a very poorly implemented custom hash function. This also adds two debug assertions, one that st_add_direct_with_hash does not receive the reserved hash value, and a second in rebuild_table_with, which ensures that after we rebuild/compact a table it contains the expected number of elements. Co-authored-by: Alan Wu <[email protected]>
-rw-r--r--common.mk1
-rw-r--r--st.c23
-rw-r--r--test/ruby/test_hash.rb14
3 files changed, 32 insertions, 6 deletions
diff --git a/common.mk b/common.mk
index cddc6dc186..a4c9c94a73 100644
--- a/common.mk
+++ b/common.mk
@@ -14894,6 +14894,7 @@ st.$(OBJEXT): {$(VPATH)}internal/variable.h
st.$(OBJEXT): {$(VPATH)}internal/warning_push.h
st.$(OBJEXT): {$(VPATH)}internal/xmalloc.h
st.$(OBJEXT): {$(VPATH)}missing.h
+st.$(OBJEXT): {$(VPATH)}ruby_assert.h
st.$(OBJEXT): {$(VPATH)}st.c
st.$(OBJEXT): {$(VPATH)}st.h
st.$(OBJEXT): {$(VPATH)}subst.h
diff --git a/st.c b/st.c
index eca7c5c3bb..d9a588db8a 100644
--- a/st.c
+++ b/st.c
@@ -103,11 +103,13 @@
#ifdef NOT_RUBY
#include "regint.h"
#include "st.h"
+#include <assert.h>
#else
#include "internal.h"
#include "internal/bits.h"
#include "internal/hash.h"
#include "internal/sanitizers.h"
+#include "ruby_assert.h"
#endif
#include <stdio.h>
@@ -115,7 +117,6 @@
#include <stdlib.h>
#endif
#include <string.h>
-#include <assert.h>
#ifdef __GNUC__
#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
@@ -313,17 +314,22 @@ static const struct st_features features[] = {
#define RESERVED_HASH_VAL (~(st_hash_t) 0)
#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
-/* Return hash value of KEY for table TAB. */
static inline st_hash_t
-do_hash(st_data_t key, st_table *tab)
+normalize_hash_value(st_hash_t hash)
{
- st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
-
/* RESERVED_HASH_VAL is used for a deleted entry. Map it into
another value. Such mapping should be extremely rare. */
return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
}
+/* Return hash value of KEY for table TAB. */
+static inline st_hash_t
+do_hash(st_data_t key, st_table *tab)
+{
+ st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
+ return normalize_hash_value(hash);
+}
+
/* Power of 2 defining the minimal number of allocated entries. */
#define MINIMAL_POWER2 2
@@ -764,6 +770,9 @@ rebuild_table_with(st_table *new_tab, st_table *tab)
new_tab->num_entries++;
ni++;
}
+
+ assert(new_tab->num_entries == tab->num_entries);
+
if (new_tab != tab) {
tab->entry_power = new_tab->entry_power;
tab->bin_power = new_tab->bin_power;
@@ -1146,6 +1155,8 @@ st_add_direct_with_hash(st_table *tab,
st_index_t ind;
st_index_t bin_ind;
+ assert(hash != RESERVED_HASH_VAL);
+
rebuild_table_if_necessary(tab);
ind = tab->entries_bound++;
entry = &tab->entries[ind];
@@ -1163,7 +1174,7 @@ void
rb_st_add_direct_with_hash(st_table *tab,
st_data_t key, st_data_t value, st_hash_t hash)
{
- st_add_direct_with_hash(tab, key, value, hash);
+ st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
}
/* Insert (KEY, VALUE) into table TAB. The table should not have
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index cef9f88a3c..58fbfea1de 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -2275,4 +2275,18 @@ class TestHash < Test::Unit::TestCase
end
end;
end
+
+ def test_ar_to_st_reserved_value
+ klass = Class.new do
+ attr_reader :hash
+ def initialize(val) = @hash = val
+ end
+
+ values = 0.downto(-16).to_a
+ hash = {}
+ values.each do |val|
+ hash[klass.new(val)] = val
+ end
+ assert_equal values, hash.values, "[ruby-core:121239] [Bug #21170]"
+ end
end