diff options
author | Jean Boussier <[email protected]> | 2022-02-18 12:54:42 +0100 |
---|---|---|
committer | Jean Boussier <[email protected]> | 2023-02-23 16:01:57 +0100 |
commit | 2a5354e59324cb296a423c73ec15ff9191086964 (patch) | |
tree | 4f05a09a431ee431f1312eef8eb23da4cab7e23a | |
parent | 9406245dbcaa324ce9ff0aae0f28b64beacc0836 (diff) |
Implement ObjectSpace::WeakKeyMap basic allocator
[Feature #18498]
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/5570
-rw-r--r-- | NEWS.md | 6 | ||||
-rw-r--r-- | gc.c | 325 | ||||
-rw-r--r-- | hash.c | 4 | ||||
-rw-r--r-- | internal/hash.h | 2 | ||||
-rw-r--r-- | test/ruby/weakkeymap.rb | 112 |
5 files changed, 447 insertions, 2 deletions
@@ -20,6 +20,12 @@ Note: We're only listing outstanding class updates. * `String#unpack` now raises ArgumentError for unknown directives. [[Bug #19150]] * `String#bytesplice` now accepts new arguments index/length or range of the source string to be copied. [[Feature #19314]] +* ObjectSpace::WeakKeyMap + + * New core class to build collections with weak references. + The class use equality semantic to lookup keys like a regular hash, + but it doesn't hold strong references on the keys. [[Feature #18498]] + ## Stdlib updates The following default gems are updated. @@ -13306,6 +13306,311 @@ wmap_size(VALUE self) #endif } + +/* + ------------------------------ WeakKeyMap ------------------------------ +*/ + +struct weakkeymap_entry { + VALUE obj; + st_index_t hash; +} typedef weakkeymap_entry_t; + +struct weakkeymap { + st_table *map; + st_table *obj2hash; + VALUE final; +}; + +static int +weakkeymap_cmp_entry(st_data_t a, st_data_t b) +{ + struct weakkeymap_entry *entry_a = (struct weakkeymap_entry *)a; + struct weakkeymap_entry *entry_b = (struct weakkeymap_entry *)b; + if (entry_a == entry_b) { + return 0; + } + else { + return rb_any_cmp(entry_a->obj, entry_b->obj); + } +} + +static st_index_t +weakkeymap_hash_entry(st_data_t a) +{ + struct weakkeymap_entry *entry_a = (struct weakkeymap_entry *)a; + return entry_a->hash; +} + +static const struct st_hash_type weakkeymap_hash = { + weakkeymap_cmp_entry, + weakkeymap_hash_entry, +}; + +static void +wkmap_compact(void *ptr) +{ + struct weakkeymap *w = ptr; + if (w->map) rb_gc_update_tbl_refs(w->map); + w->final = rb_gc_location(w->final); +} + +static void +wkmap_mark(void *ptr) +{ + struct weakkeymap *w = ptr; + rb_mark_tbl_no_pin(w->map); + rb_gc_mark_movable(w->final); +} + +static void +wkmap_free(void *ptr) +{ + struct weakkeymap *w = ptr; + st_free_table(w->map); + st_free_table(w->obj2hash); + xfree(w); +} + +static size_t +wkmap_memsize(const void *ptr) +{ + const struct weakkeymap *w = ptr; + return sizeof(struct weakkeymap) + st_memsize(w->map) + st_memsize(w->obj2hash); +} + +static const rb_data_type_t weakkeymap_type = { + "weakkeymap", + { + wkmap_mark, + wkmap_free, + wkmap_memsize, + wkmap_compact, + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY +}; + +static VALUE +wkmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self)) +{ + struct weakkeymap *w; + VALUE key; + + TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w); + + /* Get reference from object id. */ + if ((key = id2ref_obj_tbl(&rb_objspace, objid)) == Qundef) { + rb_bug("wkmap_finalize: objid is not found."); + } + + st_index_t hash; + if (st_delete(w->obj2hash, (st_data_t *)key, &hash)) { + weakkeymap_entry_t lookup_entry = {key, hash}; + weakkeymap_entry_t *deleted_entry = NULL; + if (st_get_key(w->map, (st_data_t)&lookup_entry, (st_data_t *)deleted_entry)) { + st_data_t deleted_value; + st_delete(w->map, (st_data_t *)deleted_entry, &deleted_value); + xfree(deleted_entry); + } + } + + return self; +} + +static VALUE +wkmap_allocate(VALUE klass) +{ + struct weakkeymap *w; + VALUE obj = TypedData_Make_Struct(klass, struct weakkeymap, &weakkeymap_type, w); + w->map = st_init_table(&weakkeymap_hash); + w->obj2hash = rb_init_identtable(); + w->final = rb_func_lambda_new(wkmap_finalize, obj, 1, 1); + return obj; +} + +static st_index_t +wkmap_lookup_hash(struct weakkeymap *w, VALUE key) +{ + st_index_t hash; + if (!st_lookup(w->obj2hash, (st_data_t)key, &hash)) { + hash = rb_any_hash(key); + } + return hash; +} + +static weakkeymap_entry_t* +wkmap_lookup_entry(struct weakkeymap *w, VALUE key, st_index_t hash) +{ + st_data_t data; + weakkeymap_entry_t lookup_entry = {key, hash}; + + if (st_get_key(w->map, (st_data_t)&lookup_entry, &data)) { + return (weakkeymap_entry_t *)data; + } + + return NULL; +} + +static VALUE +wkmap_lookup(VALUE self, VALUE key) +{ + st_data_t data; + struct weakkeymap *w; + TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w); + + st_index_t hash = rb_any_hash(key); + weakkeymap_entry_t lookup_entry = {key, hash}; + + if (st_lookup(w->map, (st_data_t)&lookup_entry, &data)) { + return (VALUE)data; + } + return Qundef; +} + +/* + * call-seq: + * map[key] -> value + * + * Returns the value associated with the given +key+ if found. + * + * If +key+ is not found, returns +nil+. + */ +static VALUE +wkmap_aref(VALUE self, VALUE key) +{ + VALUE obj = wkmap_lookup(self, key); + return obj != Qundef ? obj : Qnil; +} + +/* + * call-seq: + * map[key] = value -> value + * + * Associates the given +value+ with the given +key+; returns +value+. + * + * The reference to +key+ is weak, so when there is no other reference + * to +key+ it may be garbage collected. + * + * If the given +key+ exists, replaces its value with the given +value+; + * the ordering is not affected + */ +static VALUE +wkmap_aset(VALUE self, VALUE key, VALUE value) +{ + struct weakkeymap *w; + TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w); + + if (!(FL_ABLE(key) && !SYMBOL_P(key) && !RB_BIGNUM_TYPE_P(key))) { + rb_raise(rb_eArgError, "WeakKeyMap must be garbage collectable"); + } + + st_index_t hash = wkmap_lookup_hash(w, key); + weakkeymap_entry_t *key_entry = wkmap_lookup_entry(w, key, hash); + + if (!key_entry) { + key_entry = ALLOC(weakkeymap_entry_t); + key_entry->obj = key; + key_entry->hash = hash; + } + + if (!st_insert(w->map, (st_data_t)key_entry, (st_data_t)value)) { + st_insert(w->obj2hash, (st_data_t)key, (st_data_t)hash); + define_final0(key, w->final); + } + + return value; +} + +/* + * call-seq: + * map.getkey(key) -> existing_key or nil + * + * Returns the existing equal key if it exists, otherwise returns +nil+. + */ +static VALUE +wkmap_getkey(VALUE self, VALUE key) +{ + struct weakkeymap *w; + TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w); + + st_index_t hash = rb_any_hash(key); + weakkeymap_entry_t lookup_entry = {key, hash}; + + weakkeymap_entry_t *key_entry = NULL; + if (st_get_key(w->map, (st_data_t)&lookup_entry, (st_data_t *)&key_entry)) { + GC_ASSERT(key_entry != NULL); + + VALUE obj = key_entry->obj; + if (wmap_live_p(&rb_objspace, obj)) { + return obj; + } + } + return Qnil; +} + +/* + * call-seq: + * hash.key?(key) -> true or false + * + * Returns +true+ if +key+ is a key in +self+, otherwise +false+. + */ +static VALUE +wkmap_has_key(VALUE self, VALUE key) +{ + return RBOOL(wkmap_lookup(self, key) != Qundef); +} + +/* + * call-seq: + * map.clear -> self + * + * Removes all map entries; returns +self+. + */ +static VALUE +wkmap_clear(VALUE self) { + struct weakkeymap *w; + TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w); + if (w->map) { + st_clear(w->map); + } + if (w->obj2hash) { + st_clear(w->obj2hash); + } + return self; +} + +/* + * call-seq: + * map.inspect -> new_string + * + * Returns a new \String containing informations about the map: + + * m = ObjectSpace::WeakKeyMap.new + * m[key] = value + * m.inspect # => "#<ObjectSpace::WeakKeyMap:0x00000001028dcba8 size=1>" + * + */ +static VALUE +wkmap_inspect(VALUE self) +{ + struct weakkeymap *w; + TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w); + + st_index_t n = 0; + if (w->map) { + n = w->map->num_entries; + } + +#if SIZEOF_ST_INDEX_T <= SIZEOF_LONG + const char * format = "#<%"PRIsVALUE":%p size=%lu>"; +#else + const char * format = "#<%"PRIsVALUE":%p size=%llu>"; +#endif + + VALUE str = rb_sprintf(format, rb_class_name(CLASS_OF(self)), (void *)self, n); + return str; +} + /* ------------------------------ GC profiler ------------------------------ */ @@ -14484,6 +14789,15 @@ rb_gcdebug_remove_stress_to_class(int argc, VALUE *argv, VALUE self) * +lib/weakref.rb+ for the public interface. */ +/* + * Document-class: ObjectSpace::WeakKeyMap + * + * An ObjectSpace::WeakKeyMap object holds references to + * any objects, but objects uses as keys can be garbage collected. + * + * Objects used as values can't be garbage collected until the key is. + */ + /* Document-class: GC::Profiler * * The GC profiler provides access to information on GC runs including time, @@ -14593,6 +14907,17 @@ Init_GC(void) rb_include_module(rb_cWeakMap, rb_mEnumerable); } + { + VALUE rb_cWeakKeyMap = rb_define_class_under(rb_mObjSpace, "WeakKeyMap", rb_cObject); + rb_define_alloc_func(rb_cWeakKeyMap, wkmap_allocate); + rb_define_method(rb_cWeakKeyMap, "[]=", wkmap_aset, 2); + rb_define_method(rb_cWeakKeyMap, "[]", wkmap_aref, 1); + rb_define_method(rb_cWeakKeyMap, "getkey", wkmap_getkey, 1); + rb_define_method(rb_cWeakKeyMap, "key?", wkmap_has_key, 1); + rb_define_method(rb_cWeakKeyMap, "clear", wkmap_clear, 0); + rb_define_method(rb_cWeakKeyMap, "inspect", wkmap_inspect, 0); + } + /* internal methods */ rb_define_singleton_method(rb_mGC, "verify_internal_consistency", gc_verify_internal_consistency_m, 0); rb_define_singleton_method(rb_mGC, "verify_transient_heap_internal_consistency", gc_verify_transient_heap_internal_consistency, 0); @@ -106,7 +106,7 @@ rb_hash_set_ifnone(VALUE hash, VALUE ifnone) return hash; } -static int +int rb_any_cmp(VALUE a, VALUE b) { if (a == b) return 0; @@ -221,7 +221,7 @@ obj_any_hash(VALUE obj) return FIX2LONG(hval); } -static st_index_t +st_index_t rb_any_hash(VALUE a) { return any_hash(a, obj_any_hash); diff --git a/internal/hash.h b/internal/hash.h index 5545ecb855..dff421137a 100644 --- a/internal/hash.h +++ b/internal/hash.h @@ -73,6 +73,8 @@ VALUE rb_hash_default_value(VALUE hash, VALUE key); VALUE rb_hash_set_default_proc(VALUE hash, VALUE proc); long rb_dbl_long_hash(double d); st_table *rb_init_identtable(void); +st_index_t rb_any_hash(VALUE a); +int rb_any_cmp(VALUE a, VALUE b); VALUE rb_to_hash_type(VALUE obj); VALUE rb_hash_key_str(VALUE); VALUE rb_hash_values(VALUE hash); diff --git a/test/ruby/weakkeymap.rb b/test/ruby/weakkeymap.rb new file mode 100644 index 0000000000..20bfe5ec2c --- /dev/null +++ b/test/ruby/weakkeymap.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: false +require 'test/unit' + +class TestWeakKeyMap < Test::Unit::TestCase + def setup + @wm = ObjectSpace::WeakKeyMap.new + end + + def test_map + x = Object.new + k = "foo" + @wm[k] = x + assert_same(x, @wm[k]) + assert_same(x, @wm["FOO".downcase]) + end + + def test_aset_const + x = Object.new + assert_raise(ArgumentError) { @wm[true] = x } + assert_raise(ArgumentError) { @wm[false] = x } + assert_raise(ArgumentError) { @wm[nil] = x } + assert_raise(ArgumentError) { @wm[42] = x } + assert_raise(ArgumentError) { @wm[2**128] = x } + assert_raise(ArgumentError) { @wm[1.23] = x } + assert_raise(ArgumentError) { @wm[:foo] = x } + assert_raise(ArgumentError) { @wm["foo#{rand}".to_sym] = x } + end + + def test_getkey + k = "foo" + @wm[k] = true + assert_same(k, @wm.getkey("FOO".downcase)) + end + + def test_key? + 1.times do + assert_weak_include(:key?, "foo") + end + GC.start + assert_not_send([@wm, :key?, "FOO".downcase]) + end + + def test_clear + k = "foo" + @wm[k] = true + assert @wm[k] + assert_same @wm, @wm.clear + refute @wm[k] + end + + def test_inspect + x = Object.new + k = Object.new + @wm[k] = x + assert_match(/\A\#<#{@wm.class.name}:[\dxa-f]+ size=\d+>\z/, @wm.inspect) + + 1000.times do |i| + @wm[i.to_s] = Object.new + @wm.inspect + end + assert_match(/\A\#<#{@wm.class.name}:[\dxa-f]+ size=\d+>\z/, @wm.inspect) + end + + def test_no_hash_method + k = BasicObject.new + assert_raise NoMethodError do + @wm[k] = 42 + end + end + + def test_frozen_object + o = Object.new.freeze + assert_nothing_raised(FrozenError) {@wm[o] = 'foo'} + assert_nothing_raised(FrozenError) {@wm['foo'] = o} + end + + def test_inconsistent_hash_key + assert_no_memory_leak [], '', <<~RUBY + class BadHash + def initialize + @hash = 0 + end + + def hash + @hash += 1 + end + end + + k = BadHash.new + wm = ObjectSpace::WeakKeyMap.new + + 100_000.times do |i| + wm[k] = i + end + RUBY + end + + private + + def assert_weak_include(m, k, n = 100) + if n > 0 + return assert_weak_include(m, k, n-1) + end + 1.times do + x = Object.new + @wm[k] = x + assert_send([@wm, m, k]) + assert_send([@wm, m, "FOO".downcase]) + x = Object.new + end + end +end |