diff options
Diffstat (limited to 'set.c')
-rw-r--r-- | set.c | 2138 |
1 files changed, 2138 insertions, 0 deletions
@@ -0,0 +1,2138 @@ +/* This implements sets using the same hash table implementation as in + st.c, but without a value for each hash entry. This results in the + same basic performance characteristics as when using an st table, + but uses 1/3 less memory. + */ + +#include "id.h" +#include "internal.h" +#include "internal/bits.h" +#include "internal/hash.h" +#include "internal/proc.h" +#include "internal/sanitizers.h" +#include "internal/set_table.h" +#include "internal/symbol.h" +#include "internal/variable.h" +#include "ruby_assert.h" + +#include <stdio.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#include <string.h> + +#ifndef SET_DEBUG +#define SET_DEBUG 0 +#endif + +#if SET_DEBUG +#include "internal/gc.h" +#endif + +static st_index_t +dbl_to_index(double d) +{ + union {double d; st_index_t i;} u; + u.d = d; + return u.i; +} + +static const uint64_t prime1 = ((uint64_t)0x2e0bb864 << 32) | 0xe9ea7df5; +static const uint32_t prime2 = 0x830fcab9; + +static inline uint64_t +mult_and_mix(uint64_t m1, uint64_t m2) +{ +#if defined HAVE_UINT128_T + uint128_t r = (uint128_t) m1 * (uint128_t) m2; + return (uint64_t) (r >> 64) ^ (uint64_t) r; +#else + uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32; + uint64_t lm1 = m1, lm2 = m2; + uint64_t v64_128 = hm1 * hm2; + uint64_t v32_96 = hm1 * lm2 + lm1 * hm2; + uint64_t v1_32 = lm1 * lm2; + + return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32); +#endif +} + +static inline uint64_t +key64_hash(uint64_t key, uint32_t seed) +{ + return mult_and_mix(key + seed, prime1); +} + +/* Should cast down the result for each purpose */ +#define set_index_hash(index) key64_hash(rb_hash_start(index), prime2) + +static st_index_t +set_ident_hash(st_data_t n) +{ +#ifdef USE_FLONUM /* RUBY */ + /* + * - flonum (on 64-bit) is pathologically bad, mix the actual + * float value in, but do not use the float value as-is since + * many integers get interpreted as 2.0 or -2.0 [Bug #10761] + */ + if (FLONUM_P(n)) { + n ^= dbl_to_index(rb_float_value(n)); + } +#endif + + return (st_index_t)set_index_hash((st_index_t)n); +} + +static const struct st_hash_type identhash = { + rb_st_numcmp, + set_ident_hash, +}; + +static const struct st_hash_type objhash = { + rb_any_cmp, + rb_any_hash, +}; + +VALUE rb_cSet; + +#define id_each idEach +static ID id_each_entry; +static ID id_any_p; +static ID id_new; +static ID id_set_iter_lev; + +#define RSET_INITIALIZED FL_USER1 +#define RSET_LEV_MASK (FL_USER13 | FL_USER14 | FL_USER15 | /* FL 13..19 */ \ + FL_USER16 | FL_USER17 | FL_USER18 | FL_USER19) +#define RSET_LEV_SHIFT (FL_USHIFT + 13) +#define RSET_LEV_MAX 127 /* 7 bits */ + +#define SET_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(SET_DEBUG, expr, #expr) + +#define RSET_SIZE(set) set_table_size(RSET_TABLE(set)) +#define RSET_EMPTY(set) (RSET_SIZE(set) == 0) +#define RSET_SIZE_NUM(set) SIZET2NUM(RSET_SIZE(set)) +#define RSET_IS_MEMBER(sobj, item) set_lookup(RSET_TABLE(set), (st_data_t)(item)) +#define RSET_COMPARE_BY_IDENTITY(set) (RSET_TABLE(set)->type == &identhash) + +struct set_object { + set_table table; +}; + +static int +mark_key(st_data_t key, st_data_t data) +{ + rb_gc_mark_movable((VALUE)key); + + return ST_CONTINUE; +} + +static void +set_mark(void *ptr) +{ + struct set_object *sobj = ptr; + if (sobj->table.entries) set_foreach(&sobj->table, mark_key, 0); +} + +static void +set_free_embedded(struct set_object *sobj) +{ + free((&sobj->table)->bins); + free((&sobj->table)->entries); +} + +static void +set_free(void *ptr) +{ + struct set_object *sobj = ptr; + set_free_embedded(sobj); + memset(&sobj->table, 0, sizeof(sobj->table)); +} + +static size_t +set_size(const void *ptr) +{ + const struct set_object *sobj = ptr; + /* Do not count the table size twice, as it is embedded */ + return (unsigned long)set_memsize(&sobj->table) - sizeof(sobj->table); +} + +static int +set_foreach_replace(st_data_t key, st_data_t argp, int error) +{ + if (rb_gc_location((VALUE)key) != (VALUE)key) { + return ST_REPLACE; + } + + return ST_CONTINUE; +} + +static int +set_replace_ref(st_data_t *key, st_data_t argp, int existing) +{ + if (rb_gc_location((VALUE)*key) != (VALUE)*key) { + *key = rb_gc_location((VALUE)*key); + } + + return ST_CONTINUE; +} + +static void +set_compact(void *ptr) +{ + struct set_object *sobj = ptr; + set_compact_table(&sobj->table); + set_foreach_with_replace(&sobj->table, set_foreach_replace, set_replace_ref, 0); +} + +static const rb_data_type_t set_data_type = { + .wrap_struct_name = "set", + .function = { + .dmark = set_mark, + .dfree = set_free, + .dsize = set_size, + .dcompact = set_compact, + }, + .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE +}; + +static inline set_table * +RSET_TABLE(VALUE set) +{ + struct set_object *sobj; + TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj); + return &sobj->table; +} + +static unsigned long +iter_lev_in_ivar(VALUE set) +{ + VALUE levval = rb_ivar_get(set, id_set_iter_lev); + SET_ASSERT(FIXNUM_P(levval)); + long lev = FIX2LONG(levval); + SET_ASSERT(lev >= 0); + return (unsigned long)lev; +} + +void rb_ivar_set_internal(VALUE obj, ID id, VALUE val); + +static void +iter_lev_in_ivar_set(VALUE set, unsigned long lev) +{ + SET_ASSERT(lev >= RSET_LEV_MAX); + SET_ASSERT(POSFIXABLE(lev)); /* POSFIXABLE means fitting to long */ + rb_ivar_set_internal(set, id_set_iter_lev, LONG2FIX((long)lev)); +} + +static inline unsigned long +iter_lev_in_flags(VALUE set) +{ + return (unsigned long)((RBASIC(set)->flags >> RSET_LEV_SHIFT) & RSET_LEV_MAX); +} + +static inline void +iter_lev_in_flags_set(VALUE set, unsigned long lev) +{ + SET_ASSERT(lev <= RSET_LEV_MAX); + RBASIC(set)->flags = ((RBASIC(set)->flags & ~RSET_LEV_MASK) | ((VALUE)lev << RSET_LEV_SHIFT)); +} + +static inline bool +set_iterating_p(VALUE set) +{ + return iter_lev_in_flags(set) > 0; +} + +static void +set_iter_lev_inc(VALUE set) +{ + unsigned long lev = iter_lev_in_flags(set); + if (lev == RSET_LEV_MAX) { + lev = iter_lev_in_ivar(set) + 1; + if (!POSFIXABLE(lev)) { /* paranoiac check */ + rb_raise(rb_eRuntimeError, "too much nested iterations"); + } + } + else { + lev += 1; + iter_lev_in_flags_set(set, lev); + if (lev < RSET_LEV_MAX) return; + } + iter_lev_in_ivar_set(set, lev); +} + +static void +set_iter_lev_dec(VALUE set) +{ + unsigned long lev = iter_lev_in_flags(set); + if (lev == RSET_LEV_MAX) { + lev = iter_lev_in_ivar(set); + if (lev > RSET_LEV_MAX) { + iter_lev_in_ivar_set(set, lev-1); + return; + } + rb_attr_delete(set, id_set_iter_lev); + } + else if (lev == 0) { + rb_raise(rb_eRuntimeError, "iteration level underflow"); + } + iter_lev_in_flags_set(set, lev - 1); +} + +static VALUE +set_foreach_ensure(VALUE set) +{ + set_iter_lev_dec(set); + return 0; +} + +typedef int set_foreach_func(VALUE, VALUE); + +struct set_foreach_arg { + VALUE set; + set_foreach_func *func; + VALUE arg; +}; + +static int +set_iter_status_check(int status) +{ + if (status == ST_CONTINUE) { + return ST_CHECK; + } + + return status; +} + +static int +set_foreach_iter(st_data_t key, st_data_t argp, int error) +{ + struct set_foreach_arg *arg = (struct set_foreach_arg *)argp; + + if (error) return ST_STOP; + + set_table *tbl = RSET_TABLE(arg->set); + int status = (*arg->func)((VALUE)key, arg->arg); + + if (RSET_TABLE(arg->set) != tbl) { + rb_raise(rb_eRuntimeError, "reset occurred during iteration"); + } + + return set_iter_status_check(status); +} + +static VALUE +set_foreach_call(VALUE arg) +{ + VALUE set = ((struct set_foreach_arg *)arg)->set; + int ret = 0; + ret = set_foreach_check(RSET_TABLE(set), set_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + if (ret) { + rb_raise(rb_eRuntimeError, "ret: %d, set modified during iteration", ret); + } + return Qnil; +} + +static void +set_iter(VALUE set, set_foreach_func *func, VALUE farg) +{ + struct set_foreach_arg arg; + + if (RSET_EMPTY(set)) + return; + arg.set = set; + arg.func = func; + arg.arg = farg; + if (RB_OBJ_FROZEN(set)) { + set_foreach_call((VALUE)&arg); + } + else { + set_iter_lev_inc(set); + rb_ensure(set_foreach_call, (VALUE)&arg, set_foreach_ensure, set); + } +} + +NORETURN(static void no_new_item(void)); +static void +no_new_item(void) +{ + rb_raise(rb_eRuntimeError, "can't add a new item into set during iteration"); +} + +static void +set_compact_after_delete(VALUE set) +{ + if (!set_iterating_p(set)) { + set_compact_table(RSET_TABLE(set)); + } +} + +static int +set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr) +{ + if (tab->type != &identhash && rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) { + key = rb_hash_key_str(key); + if (key_addr) *key_addr = key; + } + int ret = set_insert(tab, (st_data_t)key); + if (ret == 0) RB_OBJ_WRITTEN(set, Qundef, key); + return ret; +} + +static int +set_insert_wb(VALUE set, VALUE key, VALUE *key_addr) +{ + return set_table_insert_wb(RSET_TABLE(set), set, key, key_addr); +} + +static VALUE +set_alloc_with_size(VALUE klass, st_index_t size) +{ + VALUE set; + struct set_object *sobj; + + set = TypedData_Make_Struct(klass, struct set_object, &set_data_type, sobj); + set_init_table_with_size(&sobj->table, &objhash, size); + + return set; +} + + +static VALUE +set_s_alloc(VALUE klass) +{ + return set_alloc_with_size(klass, 0); +} + +static VALUE +set_s_create(int argc, VALUE *argv, VALUE klass) +{ + VALUE set = set_alloc_with_size(klass, argc); + set_table *table = RSET_TABLE(set); + int i; + + for (i=0; i < argc; i++) { + set_table_insert_wb(table, set, argv[i], NULL); + } + + return set; +} + +static void +check_set(VALUE arg) +{ + if (!rb_obj_is_kind_of(arg, rb_cSet)) { + rb_raise(rb_eArgError, "value must be a set"); + } +} + +static ID +enum_method_id(VALUE other) +{ + if (rb_respond_to(other, id_each_entry)) { + return id_each_entry; + } + else if (rb_respond_to(other, id_each)) { + return id_each; + } + else { + rb_raise(rb_eArgError, "value must be enumerable"); + } +} + +static VALUE +set_enum_size(VALUE set, VALUE args, VALUE eobj) +{ + return RSET_SIZE_NUM(set); +} + +static VALUE +set_initialize_without_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) +{ + VALUE element = i; + set_insert_wb(set, element, &element); + return element; +} + +static VALUE +set_initialize_with_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) +{ + VALUE element = rb_yield(i); + set_insert_wb(set, element, &element); + return element; +} + +/* + * call-seq: + * Set.new -> new_set + * Set.new(enum) -> new_set + * Set.new(enum) { |elem| ... } -> new_set + * + * Creates a new set containing the elements of the given enumerable + * object. + * + * If a block is given, the elements of enum are preprocessed by the + * given block. + * + * Set.new([1, 2]) #=> #<Set: {1, 2}> + * Set.new([1, 2, 1]) #=> #<Set: {1, 2}> + * Set.new([1, 'c', :s]) #=> #<Set: {1, "c", :s}> + * Set.new(1..5) #=> #<Set: {1, 2, 3, 4, 5}> + * Set.new([1, 2, 3]) { |x| x * x } #=> #<Set: {1, 4, 9}> + */ +static VALUE +set_i_initialize(int argc, VALUE *argv, VALUE set) +{ + if (RBASIC(set)->flags & RSET_INITIALIZED) { + rb_raise(rb_eRuntimeError, "cannot reinitialize set"); + } + RBASIC(set)->flags |= RSET_INITIALIZED; + + VALUE other; + rb_check_arity(argc, 0, 1); + + if (argc > 0 && (other = argv[0]) != Qnil) { + if (RB_TYPE_P(other, T_ARRAY)) { + long len = RARRAY_LEN(other); + if (RARRAY_LEN(other) != 0) { + set_table *into = RSET_TABLE(set); + VALUE key; + int block_given = rb_block_given_p(); + RARRAY_PTR_USE(other, ptr, { + for(; len > 0; len--, ptr++) { + key = *ptr; + if (block_given) key = rb_yield(key); + set_table_insert_wb(into, set, key, NULL); + } + }); + } + } + else { + rb_block_call(other, enum_method_id(other), 0, 0, + rb_block_given_p() ? set_initialize_with_block : set_initialize_without_block, + set); + } + } + + return set; +} + +static VALUE +set_i_initialize_copy(VALUE set, VALUE other) +{ + if (set == other) return set; + + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "cannot replace set during iteration"); + } + + struct set_object *sobj; + TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj); + + set_free_embedded(sobj); + set_copy(&sobj->table, RSET_TABLE(other)); + + return set; +} + +static int +set_inspect_i(st_data_t key, st_data_t arg) +{ + VALUE str = (VALUE)arg; + if (RSTRING_LEN(str) > 8) { + rb_str_buf_cat_ascii(str, ", "); + } + rb_str_buf_append(str, rb_inspect((VALUE)key)); + + return ST_CONTINUE; +} + +static VALUE +set_inspect(VALUE set, VALUE dummy, int recur) +{ + VALUE str; + + if (recur) return rb_usascii_str_new2("#<Set: {...}>"); + str = rb_str_buf_new2("#<Set: {"); + set_iter(set, set_inspect_i, str); + rb_str_buf_cat2(str, "}>"); + + return str; +} + +/* + * call-seq: + * inspect -> new_string + * + * Returns a new string containing the set entries: + * + * s = Set.new + * s.inspect # => "#<Set: {}>" + * s.add(1) + * s.inspect # => "#<Set: {1}>" + * s.add(2) + * s.inspect # => "#<Set: {1, 2}>" + * + * Related: see {Methods for Converting}[rdoc-ref:Set@Methods+for+Converting]. + */ +static VALUE +set_i_inspect(VALUE set) +{ + return rb_exec_recursive(set_inspect, set, 0); +} + +static int +set_to_a_i(st_data_t key, st_data_t arg) +{ + rb_ary_push((VALUE)arg, (VALUE)key); + return ST_CONTINUE; +} + +/* + * call-seq: + * to_a -> array + * + * Returns an array containing all elements in the set. + * + * Set[1, 2].to_a #=> [1, 2] + * Set[1, 'c', :s].to_a #=> [1, "c", :s] + */ +static VALUE +set_i_to_a(VALUE set) +{ + st_index_t size = RSET_SIZE(set); + VALUE ary = rb_ary_new_capa(size); + + if (size == 0) return ary; + + if (ST_DATA_COMPATIBLE_P(VALUE)) { + RARRAY_PTR_USE(ary, ptr, { + size = set_keys(RSET_TABLE(set), ptr, size); + }); + rb_gc_writebarrier_remember(ary); + rb_ary_set_len(ary, size); + } + else { + set_iter(set, set_to_a_i, (st_data_t)ary); + } + return ary; +} + +/* + * call-seq: + * to_set(klass = Set, *args, &block) -> self or new_set + * + * Returns self if receiver is an instance of +Set+ and no arguments or + * block are given. Otherwise, converts the set to another with + * <tt>klass.new(self, *args, &block)</tt>. + * + * In subclasses, returns `klass.new(self, *args, &block)` unless overridden. + */ +static VALUE +set_i_to_set(int argc, VALUE *argv, VALUE set) +{ + VALUE klass; + + if (argc == 0) { + klass = rb_cSet; + argv = &set; + argc = 1; + } + else { + klass = argv[0]; + argv[0] = set; + } + + if (klass == rb_cSet && rb_obj_is_instance_of(set, rb_cSet) && + argc == 1 && !rb_block_given_p()) { + return set; + } + + return rb_funcall_passing_block(klass, id_new, argc, argv); +} + +/* + * call-seq: + * join(separator=nil)-> new_string + * + * Returns a string created by converting each element of the set to a string. + */ +static VALUE +set_i_join(int argc, VALUE *argv, VALUE set) +{ + rb_check_arity(argc, 0, 1); + return rb_ary_join(set_i_to_a(set), argc == 0 ? Qnil : argv[0]); +} + +/* + * call-seq: + * add(obj) -> self + * + * Adds the given object to the set and returns self. Use `merge` to + * add many elements at once. + * + * Set[1, 2].add(3) #=> #<Set: {1, 2, 3}> + * Set[1, 2].add([3, 4]) #=> #<Set: {1, 2, [3, 4]}> + * Set[1, 2].add(2) #=> #<Set: {1, 2}> + */ +static VALUE +set_i_add(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_iterating_p(set)) { + if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) { + no_new_item(); + } + } + else { + set_insert_wb(set, item, NULL); + } + return set; +} + +/* + * call-seq: + * add?(obj) -> self or nil + * + * Adds the given object to the set and returns self. If the object is + * already in the set, returns nil. + * + * Set[1, 2].add?(3) #=> #<Set: {1, 2, 3}> + * Set[1, 2].add?([3, 4]) #=> #<Set: {1, 2, [3, 4]}> + * Set[1, 2].add?(2) #=> nil + */ +static VALUE +set_i_add_p(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_iterating_p(set)) { + if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) { + no_new_item(); + } + return Qnil; + } + else { + return set_insert_wb(set, item, NULL) ? Qnil : set; + } +} + +/* + * call-seq: + * delete(obj) -> self + * + * Deletes the given object from the set and returns self. Use subtract + * to delete many items at once. + */ +static VALUE +set_i_delete(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) { + set_compact_after_delete(set); + } + return set; +} + +/* + * call-seq: + * delete?(obj) -> self or nil + * + * Deletes the given object from the set and returns self. If the + * object is not in the set, returns nil. + */ +static VALUE +set_i_delete_p(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) { + set_compact_after_delete(set); + return set; + } + return Qnil; +} + +static int +set_delete_if_i(st_data_t key, st_data_t dummy) +{ + return RTEST(rb_yield((VALUE)key)) ? ST_DELETE : ST_CONTINUE; +} + +/* + * call-seq: + * delete_if { |o| ... } -> self + * delete_if -> enumerator + * + * Deletes every element of the set for which block evaluates to + * true, and returns self. Returns an enumerator if no block is given. + */ +static VALUE +set_i_delete_if(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + set_iter(set, set_delete_if_i, 0); + set_compact_after_delete(set); + return set; +} + +/* + * call-seq: + * reject! { |o| ... } -> self + * reject! -> enumerator + * + * Equivalent to Set#delete_if, but returns nil if no changes were made. + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_reject(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + set_table *table = RSET_TABLE(set); + size_t n = set_table_size(table); + set_iter(set, set_delete_if_i, 0); + + if (n == set_table_size(table)) return Qnil; + + set_compact_after_delete(set); + return set; +} + +static int +set_classify_i(st_data_t key, st_data_t tmp) +{ + VALUE* args = (VALUE*)tmp; + VALUE hash = args[0]; + VALUE hash_key = rb_yield(key); + VALUE set = rb_hash_lookup2(hash, hash_key, Qundef); + if (set == Qundef) { + set = set_s_alloc(args[1]); + rb_hash_aset(hash, hash_key, set); + } + set_i_add(set, key); + + return ST_CONTINUE; +} + +/* + * call-seq: + * classify { |o| ... } -> hash + * classify -> enumerator + * + * Classifies the set by the return value of the given block and + * returns a hash of {value => set of elements} pairs. The block is + * called once for each element of the set, passing the element as + * parameter. + * + * files = Set.new(Dir.glob("*.rb")) + * hash = files.classify { |f| File.mtime(f).year } + * hash #=> {2000 => #<Set: {"a.rb", "b.rb"}>, + * # 2001 => #<Set: {"c.rb", "d.rb", "e.rb"}>, + * # 2002 => #<Set: {"f.rb"}>} + * + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_classify(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + VALUE args[2]; + args[0] = rb_hash_new(); + args[1] = rb_obj_class(set); + set_iter(set, set_classify_i, (st_data_t)args); + return args[0]; +} + +struct set_divide_args { + VALUE self; + VALUE set_class; + VALUE final_set; + VALUE hash; + VALUE current_set; + VALUE current_item; + unsigned long ni; + unsigned long nj; +}; + +static VALUE +set_divide_block0(RB_BLOCK_CALL_FUNC_ARGLIST(j, arg)) +{ + struct set_divide_args *args = (struct set_divide_args *)arg; + if (args->nj > args->ni) { + VALUE i = args->current_item; + if (RTEST(rb_yield_values(2, i, j)) && RTEST(rb_yield_values(2, j, i))) { + VALUE hash = args->hash; + if (args->current_set == Qnil) { + VALUE set = rb_hash_aref(hash, j); + if (set == Qnil) { + VALUE both[2] = {i, j}; + set = set_s_create(2, both, args->set_class); + rb_hash_aset(hash, i, set); + rb_hash_aset(hash, j, set); + set_i_add(args->final_set, set); + } + else { + set_i_add(set, i); + rb_hash_aset(hash, i, set); + } + args->current_set = set; + } + else { + set_i_add(args->current_set, j); + rb_hash_aset(hash, j, args->current_set); + } + } + } + args->nj++; + return j; +} + +static VALUE +set_divide_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, arg)) +{ + struct set_divide_args *args = (struct set_divide_args *)arg; + VALUE hash = args->hash; + args->current_set = rb_hash_aref(hash, i); + args->current_item = i; + args->nj = 0; + rb_block_call(args->self, id_each, 0, 0, set_divide_block0, arg); + if (args->current_set == Qnil) { + VALUE set = set_s_create(1, &i, args->set_class); + rb_hash_aset(hash, i, set); + set_i_add(args->final_set, set); + } + args->ni++; + return i; +} + +static void set_merge_enum_into(VALUE set, VALUE arg); + +/* + * call-seq: + * divide { |o1, o2| ... } -> set + * divide { |o| ... } -> set + * divide -> enumerator + * + * Divides the set into a set of subsets according to the commonality + * defined by the given block. + * + * If the arity of the block is 2, elements o1 and o2 are in common + * if both block.call(o1, o2) and block.call(o2, o1) are true. + * Otherwise, elements o1 and o2 are in common if + * block.call(o1) == block.call(o2). + * + * numbers = Set[1, 3, 4, 6, 9, 10, 11] + * set = numbers.divide { |i,j| (i - j).abs == 1 } + * set #=> #<Set: {#<Set: {1}>, + * # #<Set: {3, 4}>, + * # #<Set: {6}>}> + * # #<Set: {9, 10, 11}>, + * + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_divide(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + + if (rb_block_arity() == 2) { + VALUE final_set = set_s_create(0, 0, rb_cSet); + struct set_divide_args args = { + .self = set, + .set_class = rb_obj_class(set), + .final_set = final_set, + .hash = rb_hash_new(), + .current_set = 0, + .current_item = 0, + .ni = 0, + .nj = 0 + }; + rb_block_call(set, id_each, 0, 0, set_divide_block, (VALUE)&args); + return final_set; + } + + VALUE values = rb_hash_values(set_i_classify(set)); + set = set_alloc_with_size(rb_cSet, RARRAY_LEN(values)); + set_merge_enum_into(set, values); + return set; +} + +static int +set_clear_i(st_data_t key, st_data_t dummy) +{ + return ST_DELETE; +} + +/* + * call-seq: + * clear -> self + * + * Removes all elements and returns self. + * + * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}> + * set.clear #=> #<Set: {}> + * set #=> #<Set: {}> + */ +static VALUE +set_i_clear(VALUE set) +{ + rb_check_frozen(set); + if (RSET_SIZE(set) == 0) return set; + if (set_iterating_p(set)) { + set_iter(set, set_clear_i, 0); + } + else { + set_clear(RSET_TABLE(set)); + set_compact_after_delete(set); + } + return set; +} + +struct set_intersection_data { + VALUE set; + set_table *into; + set_table *other; +}; + +static int +set_intersection_i(st_data_t key, st_data_t tmp) +{ + struct set_intersection_data *data = (struct set_intersection_data *)tmp; + if (set_lookup(data->other, key)) { + set_table_insert_wb(data->into, data->set, key, NULL); + } + + return ST_CONTINUE; +} + +static VALUE +set_intersection_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, data)) +{ + set_intersection_i((st_data_t)i, (st_data_t)data); + return i; +} + +/* + * call-seq: + * set & enum -> new_set + * + * Returns a new set containing elements common to the set and the given + * enumerable object. + * + * Set[1, 3, 5] & Set[3, 2, 1] #=> #<Set: {3, 1}> + * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> #<Set: {"a", "b"}> + */ +static VALUE +set_i_intersection(VALUE set, VALUE other) +{ + VALUE new_set = set_s_alloc(rb_obj_class(set)); + set_table *stable = RSET_TABLE(set); + set_table *ntable = RSET_TABLE(new_set); + + if (rb_obj_is_kind_of(other, rb_cSet)) { + set_table *otable = RSET_TABLE(other); + if (set_table_size(stable) >= set_table_size(otable)) { + /* Swap so we iterate over the smaller set */ + otable = stable; + set = other; + } + + struct set_intersection_data data = { + .set = new_set, + .into = ntable, + .other = otable + }; + set_iter(set, set_intersection_i, (st_data_t)&data); + } + else { + struct set_intersection_data data = { + .set = new_set, + .into = ntable, + .other = stable + }; + rb_block_call(other, enum_method_id(other), 0, 0, set_intersection_block, (VALUE)&data); + } + + return new_set; +} + +/* + * call-seq: + * include?(item) -> true or false + * + * Returns true if the set contains the given object: + * + * Set[1, 2, 3].include? 2 #=> true + * Set[1, 2, 3].include? 4 #=> false + * + * Note that <code>include?</code> and <code>member?</code> do not test member + * equality using <code>==</code> as do other Enumerables. + * + * This is aliased to #===, so it is usable in +case+ expressions: + * + * case :apple + * when Set[:potato, :carrot] + * "vegetable" + * when Set[:apple, :banana] + * "fruit" + * end + * # => "fruit" + * + * See also Enumerable#include? + */ +static VALUE +set_i_include(VALUE set, VALUE item) +{ + return RBOOL(RSET_IS_MEMBER(set, item)); +} + +struct set_merge_args { + VALUE set; + set_table *into; +}; + +static int +set_merge_i(st_data_t key, st_data_t data) +{ + struct set_merge_args *args = (struct set_merge_args *)data; + set_table_insert_wb(args->into, args->set, key, NULL); + return ST_CONTINUE; +} + +static VALUE +set_merge_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set)) +{ + VALUE element = key; + set_insert_wb(set, element, &element); + return element; +} + +static void +set_merge_enum_into(VALUE set, VALUE arg) +{ + if (rb_obj_is_kind_of(arg, rb_cSet)) { + struct set_merge_args args = { + .set = set, + .into = RSET_TABLE(set) + }; + set_iter(arg, set_merge_i, (st_data_t)&args); + } + else if (RB_TYPE_P(arg, T_ARRAY)) { + long len = RARRAY_LEN(arg); + if (RARRAY_LEN(arg) != 0) { + set_table *into = RSET_TABLE(set); + RARRAY_PTR_USE(arg, ptr, { + for(; len > 0; len--, ptr++) { + set_table_insert_wb(into, set, *ptr, NULL); + } + }); + } + } + else { + rb_block_call(arg, enum_method_id(arg), 0, 0, set_merge_block, (VALUE)set); + } +} + +/* + * call-seq: + * merge(*enums, **nil) -> self + * + * Merges the elements of the given enumerable objects to the set and + * returns self. + */ +static VALUE +set_i_merge(int argc, VALUE *argv, VALUE set) +{ + if (rb_keyword_given_p()) { + rb_raise(rb_eArgError, "no keywords accepted"); + } + rb_check_frozen(set); + + int i; + + for (i=0; i < argc; i++) { + set_merge_enum_into(set, argv[i]); + } + + return set; +} + +static VALUE +set_reset_table_with_type(VALUE set, const struct st_hash_type *type) +{ + rb_check_frozen(set); + + struct set_object *sobj; + TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj); + set_table *old = &sobj->table; + + size_t size = set_table_size(old); + if (size > 0) { + set_table *new = set_init_table_with_size(NULL, type, size); + struct set_merge_args args = { + .set = set, + .into = new + }; + set_iter(set, set_merge_i, (st_data_t)&args); + set_free_embedded(sobj); + memcpy(&sobj->table, new, sizeof(*new)); + free(new); + } + else { + sobj->table.type = type; + } + + return set; +} + +/* + * call-seq: + * compare_by_identity -> self + * + * Makes the set compare its elements by their identity and returns self. + */ +static VALUE +set_i_compare_by_identity(VALUE set) +{ + if (RSET_COMPARE_BY_IDENTITY(set)) return set; + + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "compare_by_identity during iteration"); + } + + return set_reset_table_with_type(set, &identhash); +} + +/* + * call-seq: + * compare_by_identity? -> true or false + * + * Returns true if the set will compare its elements by their + * identity. Also see Set#compare_by_identity. + */ +static VALUE +set_i_compare_by_identity_p(VALUE set) +{ + return RBOOL(RSET_COMPARE_BY_IDENTITY(set)); +} + +/* + * call-seq: + * size -> integer + * + * Returns the number of elements. + */ +static VALUE +set_i_size(VALUE set) +{ + return RSET_SIZE_NUM(set); +} + +/* + * call-seq: + * empty? -> true or false + * + * Returns true if the set contains no elements. + */ +static VALUE +set_i_empty(VALUE set) +{ + return RBOOL(RSET_EMPTY(set)); +} + +static int +set_xor_i(st_data_t key, st_data_t data) +{ + VALUE element = (VALUE)key; + VALUE set = (VALUE)data; + set_table *table = RSET_TABLE(set); + if (set_table_insert_wb(table, set, element, &element)) { + set_delete(table, &element); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * set ^ enum -> new_set + * + * Returns a new set containing elements exclusive between the set and the + * given enumerable object. <tt>(set ^ enum)</tt> is equivalent to + * <tt>((set | enum) - (set & enum))</tt>. + * + * Set[1, 2] ^ Set[2, 3] #=> #<Set: {3, 1}> + * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> #<Set: {"d", 1, "c"}> + */ +static VALUE +set_i_xor(VALUE set, VALUE other) +{ + VALUE new_set; + if (rb_obj_is_kind_of(other, rb_cSet)) { + new_set = other; + } + else { + new_set = set_s_alloc(rb_obj_class(set)); + set_merge_enum_into(new_set, other); + } + set_iter(set, set_xor_i, (st_data_t)new_set); + return new_set; +} + +/* + * call-seq: + * set | enum -> new_set + * + * Returns a new set built by merging the set and the elements of the + * given enumerable object. + * + * Set[1, 2, 3] | Set[2, 4, 5] #=> #<Set: {1, 2, 3, 4, 5}> + * Set[1, 5, 'z'] | (1..6) #=> #<Set: {1, 5, "z", 2, 3, 4, 6}> + */ +static VALUE +set_i_union(VALUE set, VALUE other) +{ + set = rb_obj_dup(set); + set_merge_enum_into(set, other); + return set; +} + +static int +set_remove_i(st_data_t key, st_data_t from) +{ + set_delete((struct set_table *)from, (st_data_t *)&key); + return ST_CONTINUE; +} + +static VALUE +set_remove_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set)) +{ + rb_check_frozen(set); + set_delete(RSET_TABLE(set), (st_data_t *)&key); + return key; +} + +static void +set_remove_enum_from(VALUE set, VALUE arg) +{ + if (rb_obj_is_kind_of(arg, rb_cSet)) { + set_iter(arg, set_remove_i, (st_data_t)RSET_TABLE(set)); + } + else { + rb_block_call(arg, enum_method_id(arg), 0, 0, set_remove_block, (VALUE)set); + } +} + +/* + * call-seq: + * subtract(enum) -> self + * + * Deletes every element that appears in the given enumerable object + * and returns self. + */ +static VALUE +set_i_subtract(VALUE set, VALUE other) +{ + rb_check_frozen(set); + set_remove_enum_from(set, other); + return set; +} + +/* + * call-seq: + * set - enum -> new_set + * + * Returns a new set built by duplicating the set, removing every + * element that appears in the given enumerable object. + * + * Set[1, 3, 5] - Set[1, 5] #=> #<Set: {3}> + * Set['a', 'b', 'z'] - ['a', 'c'] #=> #<Set: {"b", "z"}> + */ +static VALUE +set_i_difference(VALUE set, VALUE other) +{ + return set_i_subtract(rb_obj_dup(set), other); +} + +static int +set_each_i(st_data_t key, st_data_t dummy) +{ + rb_yield(key); + return ST_CONTINUE; +} + +/* + * call-seq: + * each { |o| ... } -> self + * each -> enumerator + * + * Calls the given block once for each element in the set, passing + * the element as parameter. Returns an enumerator if no block is + * given. + */ +static VALUE +set_i_each(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + set_iter(set, set_each_i, 0); + return set; +} + +static int +set_collect_i(st_data_t key, st_data_t data) +{ + set_insert_wb((VALUE)data, rb_yield((VALUE)key), NULL); + return ST_CONTINUE; +} + +/* + * call-seq: + * collect! { |o| ... } -> self + * collect! -> enumerator + * + * Replaces the elements with ones returned by +collect+. + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_collect(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + VALUE new_set = set_s_alloc(rb_obj_class(set)); + set_iter(set, set_collect_i, (st_data_t)new_set); + set_i_initialize_copy(set, new_set); + + return set; +} + +static int +set_keep_if_i(st_data_t key, st_data_t into) +{ + if (!RTEST(rb_yield((VALUE)key))) { + set_delete((set_table *)into, &key); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * keep_if { |o| ... } -> self + * keep_if -> enumerator + * + * Deletes every element of the set for which block evaluates to false, and + * returns self. Returns an enumerator if no block is given. + */ +static VALUE +set_i_keep_if(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + set_iter(set, set_keep_if_i, (st_data_t)RSET_TABLE(set)); + + return set; +} + +/* + * call-seq: + * select! { |o| ... } -> self + * select! -> enumerator + * + * Equivalent to Set#keep_if, but returns nil if no changes were made. + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_select(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + set_table *table = RSET_TABLE(set); + size_t n = set_table_size(table); + set_iter(set, set_keep_if_i, (st_data_t)table); + + return (n == set_table_size(table)) ? Qnil : set; +} + +/* + * call-seq: + * replace(enum) -> self + * + * Replaces the contents of the set with the contents of the given + * enumerable object and returns self. + * + * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}> + * set.replace([1, 2]) #=> #<Set: {1, 2}> + * set #=> #<Set: {1, 2}> + */ +static VALUE +set_i_replace(VALUE set, VALUE other) +{ + rb_check_frozen(set); + + if (rb_obj_is_kind_of(other, rb_cSet)) { + set_i_initialize_copy(set, other); + } + else { + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "cannot replace set during iteration"); + } + + // make sure enum is enumerable before calling clear + enum_method_id(other); + + set_clear(RSET_TABLE(set)); + set_merge_enum_into(set, other); + } + + return set; +} + +/* + * call-seq: + * reset -> self + * + * Resets the internal state after modification to existing elements + * and returns self. Elements will be reindexed and deduplicated. + */ +static VALUE +set_i_reset(VALUE set) +{ + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "reset during iteration"); + } + + return set_reset_table_with_type(set, RSET_TABLE(set)->type); +} + +static void set_flatten_merge(VALUE set, VALUE from, VALUE seen); + +static int +set_flatten_merge_i(st_data_t item, st_data_t arg) +{ + VALUE *args = (VALUE *)arg; + VALUE set = args[0]; + if (rb_obj_is_kind_of(item, rb_cSet)) { + VALUE e_id = rb_obj_id(item); + VALUE hash = args[2]; + switch(rb_hash_aref(hash, e_id)) { + case Qfalse: + return ST_CONTINUE; + case Qtrue: + rb_raise(rb_eArgError, "tried to flatten recursive Set"); + default: + break; + } + + rb_hash_aset(hash, e_id, Qtrue); + set_flatten_merge(set, item, hash); + rb_hash_aset(hash, e_id, Qfalse); + } + else { + set_i_add(set, item); + } + return ST_CONTINUE; +} + +static void +set_flatten_merge(VALUE set, VALUE from, VALUE hash) +{ + VALUE args[3] = {set, from, hash}; + set_iter(from, set_flatten_merge_i, (st_data_t)args); +} + +/* + * call-seq: + * flatten -> set + * + * Returns a new set that is a copy of the set, flattening each + * containing set recursively. + */ +static VALUE +set_i_flatten(VALUE set) +{ + VALUE new_set = set_s_alloc(rb_obj_class(set)); + set_flatten_merge(new_set, set, rb_hash_new()); + return new_set; +} + +static int +set_contains_set_i(st_data_t item, st_data_t arg) +{ + if (rb_obj_is_kind_of(item, rb_cSet)) { + *(bool *)arg = true; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * flatten! -> self + * + * Equivalent to Set#flatten, but replaces the receiver with the + * result in place. Returns nil if no modifications were made. + */ +static VALUE +set_i_flatten_bang(VALUE set) +{ + bool contains_set = false; + set_iter(set, set_contains_set_i, (st_data_t)&contains_set); + if (!contains_set) return Qnil; + rb_check_frozen(set); + return set_i_replace(set, set_i_flatten(set)); +} + +struct set_subset_data { + set_table *table; + VALUE result; +}; + +static int +set_le_i(st_data_t key, st_data_t arg) +{ + struct set_subset_data *data = (struct set_subset_data *)arg; + if (set_lookup(data->table, key)) return ST_CONTINUE; + data->result = Qfalse; + return ST_STOP; +} + +static VALUE +set_le(VALUE set, VALUE other) +{ + struct set_subset_data data = { + .table = RSET_TABLE(other), + .result = Qtrue + }; + set_iter(set, set_le_i, (st_data_t)&data); + return data.result; +} + +/* + * call-seq: + * proper_subset?(set) -> true or false + * + * Returns true if the set is a proper subset of the given set. + */ +static VALUE +set_i_proper_subset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) >= RSET_SIZE(other)) return Qfalse; + return set_le(set, other); +} + +/* + * call-seq: + * subset?(set) -> true or false + * + * Returns true if the set is a subset of the given set. + */ +static VALUE +set_i_subset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) > RSET_SIZE(other)) return Qfalse; + return set_le(set, other); +} + +/* + * call-seq: + * proper_superset?(set) -> true or false + * + * Returns true if the set is a proper superset of the given set. + */ +static VALUE +set_i_proper_superset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) <= RSET_SIZE(other)) return Qfalse; + return set_le(other, set); +} + +/* + * call-seq: + * superset?(set) -> true or false + * + * Returns true if the set is a superset of the given set. + */ +static VALUE +set_i_superset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) < RSET_SIZE(other)) return Qfalse; + return set_le(other, set); +} + +static int +set_intersect_i(st_data_t key, st_data_t arg) +{ + VALUE *args = (VALUE *)arg; + if (set_lookup((set_table *)args[0], key)) { + args[1] = Qtrue; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * intersect?(set) -> true or false + * + * Returns true if the set and the given enumerable have at least one + * element in common. + * + * Set[1, 2, 3].intersect? Set[4, 5] #=> false + * Set[1, 2, 3].intersect? Set[3, 4] #=> true + * Set[1, 2, 3].intersect? 4..5 #=> false + * Set[1, 2, 3].intersect? [3, 4] #=> true + */ +static VALUE +set_i_intersect(VALUE set, VALUE other) +{ + if (rb_obj_is_kind_of(other, rb_cSet)) { + size_t set_size = RSET_SIZE(set); + size_t other_size = RSET_SIZE(other); + VALUE args[2]; + args[1] = Qfalse; + VALUE iter_arg; + + if (set_size < other_size) { + iter_arg = set; + args[0] = (VALUE)RSET_TABLE(other); + } + else { + iter_arg = other; + args[0] = (VALUE)RSET_TABLE(set); + } + set_iter(iter_arg, set_intersect_i, (st_data_t)args); + return args[1]; + } + else if (rb_obj_is_kind_of(other, rb_mEnumerable)) { + return rb_funcall(other, id_any_p, 1, set); + } + else { + rb_raise(rb_eArgError, "value must be enumerable"); + } +} + +/* + * call-seq: + * disjoint?(set) -> true or false + * + * Returns true if the set and the given enumerable have no + * element in common. This method is the opposite of +intersect?+. + * + * Set[1, 2, 3].disjoint? Set[3, 4] #=> false + * Set[1, 2, 3].disjoint? Set[4, 5] #=> true + * Set[1, 2, 3].disjoint? [3, 4] #=> false + * Set[1, 2, 3].disjoint? 4..5 #=> true + */ +static VALUE +set_i_disjoint(VALUE set, VALUE other) +{ + return RBOOL(!RTEST(set_i_intersect(set, other))); +} + +/* + * call-seq: + * set <=> other -> -1, 0, 1, or nil + * + * Returns 0 if the set are equal, -1 / 1 if the set is a + * proper subset / superset of the given set, or or nil if + * they both have unique elements. + */ +static VALUE +set_i_compare(VALUE set, VALUE other) +{ + if (rb_obj_is_kind_of(other, rb_cSet)) { + size_t set_size = RSET_SIZE(set); + size_t other_size = RSET_SIZE(other); + + if (set_size < other_size) { + if (set_le(set, other) == Qtrue) { + return INT2NUM(-1); + } + } + else if (set_size > other_size) { + if (set_le(other, set) == Qtrue) { + return INT2NUM(1); + } + } + else if (set_le(set, other) == Qtrue) { + return INT2NUM(0); + } + } + + return Qnil; +} + +struct set_equal_data { + VALUE result; + VALUE set; +}; + +static int +set_eql_i(st_data_t item, st_data_t arg) +{ + struct set_equal_data *data = (struct set_equal_data *)arg; + + if (!set_lookup(RSET_TABLE(data->set), item)) { + data->result = Qfalse; + return ST_STOP; + } + return ST_CONTINUE; +} + +static VALUE +set_recursive_eql(VALUE set, VALUE dt, int recur) +{ + if (recur) return Qtrue; + struct set_equal_data *data = (struct set_equal_data*)dt; + data->result = Qtrue; + set_iter(set, set_eql_i, dt); + return data->result; +} + +/* + * call-seq: + * set == other -> true or false + * + * Returns true if two sets are equal. + */ +static VALUE +set_i_eq(VALUE set, VALUE other) +{ + if (!rb_obj_is_kind_of(other, rb_cSet)) return Qfalse; + if (set == other) return Qtrue; + + set_table *stable = RSET_TABLE(set); + set_table *otable = RSET_TABLE(other); + size_t ssize = set_table_size(stable); + size_t osize = set_table_size(otable); + + if (ssize != osize) return Qfalse; + if (ssize == 0 && osize == 0) return Qtrue; + if (stable->type != otable->type) return Qfalse; + + struct set_equal_data data; + data.set = other; + return rb_exec_recursive_paired(set_recursive_eql, set, other, (VALUE)&data); +} + +static int +set_hash_i(st_data_t item, st_data_t(arg)) +{ + st_index_t *hval = (st_index_t *)arg; + st_index_t ival = rb_hash(item); + *hval ^= rb_st_hash(&ival, sizeof(st_index_t), 0); + return ST_CONTINUE; +} + +/* + * call-seq: + * hash -> integer + * + * Returns hash code for set. + */ +static VALUE +set_i_hash(VALUE set) +{ + st_index_t size = RSET_SIZE(set); + st_index_t hval = rb_st_hash_start(size); + hval = rb_hash_uint(hval, (st_index_t)set_i_hash); + if (size) { + set_iter(set, set_hash_i, (VALUE)&hval); + } + hval = rb_st_hash_end(hval); + return ST2FIX(hval); +} + +/* + * Document-class: Set + * + * Copyright (c) 2002-2024 Akinori MUSHA <[email protected]> + * + * Documentation by Akinori MUSHA and Gavin Sinclair. + * + * All rights reserved. You can redistribute and/or modify it under the same + * terms as Ruby. + * + * The Set class implements a collection of unordered values with no + * duplicates. It is a hybrid of Array's intuitive inter-operation + * facilities and Hash's fast lookup. + * + * Set is easy to use with Enumerable objects (implementing `each`). + * Most of the initializer methods and binary operators accept generic + * Enumerable objects besides sets and arrays. An Enumerable object + * can be converted to Set using the `to_set` method. + * + * Set uses a data structure similar to Hash for storage, except that + * it only has keys and no values. + * + * * Equality of elements is determined according to Object#eql? and + * Object#hash. Use Set#compare_by_identity to make a set compare + * its elements by their identity. + * * Set assumes that the identity of each element does not change + * while it is stored. Modifying an element of a set will render the + * set to an unreliable state. + * * When a string is to be stored, a frozen copy of the string is + * stored instead unless the original string is already frozen. + * + * == Comparison + * + * The comparison operators <tt><</tt>, <tt>></tt>, <tt><=</tt>, and + * <tt>>=</tt> are implemented as shorthand for the + * {proper_,}{subset?,superset?} methods. The <tt><=></tt> + * operator reflects this order, or returns +nil+ for sets that both + * have distinct elements (<tt>{x, y}</tt> vs. <tt>{x, z}</tt> for example). + * + * == Example + * + * s1 = Set[1, 2] #=> #<Set: {1, 2}> + * s2 = [1, 2].to_set #=> #<Set: {1, 2}> + * s1 == s2 #=> true + * s1.add("foo") #=> #<Set: {1, 2, "foo"}> + * s1.merge([2, 6]) #=> #<Set: {1, 2, "foo", 6}> + * s1.subset?(s2) #=> false + * s2.subset?(s1) #=> true + * + * == Contact + * + * - Akinori MUSHA <[email protected]> (current maintainer) + * + * == What's Here + * + * First, what's elsewhere. \Class \Set: + * + * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * which provides dozens of additional methods. + * + * In particular, class \Set does not have many methods of its own + * for fetching or for iterating. + * Instead, it relies on those in \Enumerable. + * + * Here, class \Set provides methods that are useful for: + * + * - {Creating an Array}[rdoc-ref:Array@Methods+for+Creating+an+Array] + * - {Creating a Set}[rdoc-ref:Array@Methods+for+Creating+a+Set] + * - {Set Operations}[rdoc-ref:Array@Methods+for+Set+Operations] + * - {Comparing}[rdoc-ref:Array@Methods+for+Comparing] + * - {Querying}[rdoc-ref:Array@Methods+for+Querying] + * - {Assigning}[rdoc-ref:Array@Methods+for+Assigning] + * - {Deleting}[rdoc-ref:Array@Methods+for+Deleting] + * - {Converting}[rdoc-ref:Array@Methods+for+Converting] + * - {Iterating}[rdoc-ref:Array@Methods+for+Iterating] + * - {And more....}[rdoc-ref:Array@Other+Methods] + * + * === Methods for Creating a \Set + * + * - ::[]: + * Returns a new set containing the given objects. + * - ::new: + * Returns a new set containing either the given objects + * (if no block given) or the return values from the called block + * (if a block given). + * + * === Methods for \Set Operations + * + * - #| (aliased as #union and #+): + * Returns a new set containing all elements from +self+ + * and all elements from a given enumerable (no duplicates). + * - #& (aliased as #intersection): + * Returns a new set containing all elements common to +self+ + * and a given enumerable. + * - #- (aliased as #difference): + * Returns a copy of +self+ with all elements + * in a given enumerable removed. + * - #^: Returns a new set containing all elements from +self+ + * and a given enumerable except those common to both. + * + * === Methods for Comparing + * + * - #<=>: Returns -1, 0, or 1 as +self+ is less than, equal to, + * or greater than a given object. + * - #==: Returns whether +self+ and a given enumerable are equal, + * as determined by Object#eql?. + * - #compare_by_identity?: + * Returns whether the set considers only identity + * when comparing elements. + * + * === Methods for Querying + * + * - #length (aliased as #size): + * Returns the count of elements. + * - #empty?: + * Returns whether the set has no elements. + * - #include? (aliased as #member? and #===): + * Returns whether a given object is an element in the set. + * - #subset? (aliased as #<=): + * Returns whether a given object is a subset of the set. + * - #proper_subset? (aliased as #<): + * Returns whether a given enumerable is a proper subset of the set. + * - #superset? (aliased as #>=): + * Returns whether a given enumerable is a superset of the set. + * - #proper_superset? (aliased as #>): + * Returns whether a given enumerable is a proper superset of the set. + * - #disjoint?: + * Returns +true+ if the set and a given enumerable + * have no common elements, +false+ otherwise. + * - #intersect?: + * Returns +true+ if the set and a given enumerable: + * have any common elements, +false+ otherwise. + * - #compare_by_identity?: + * Returns whether the set considers only identity + * when comparing elements. + * + * === Methods for Assigning + * + * - #add (aliased as #<<): + * Adds a given object to the set; returns +self+. + * - #add?: + * If the given object is not an element in the set, + * adds it and returns +self+; otherwise, returns +nil+. + * - #merge: + * Merges the elements of each given enumerable object to the set; returns +self+. + * - #replace: + * Replaces the contents of the set with the contents + * of a given enumerable. + * + * === Methods for Deleting + * + * - #clear: + * Removes all elements in the set; returns +self+. + * - #delete: + * Removes a given object from the set; returns +self+. + * - #delete?: + * If the given object is an element in the set, + * removes it and returns +self+; otherwise, returns +nil+. + * - #subtract: + * Removes each given object from the set; returns +self+. + * - #delete_if - Removes elements specified by a given block. + * - #select! (aliased as #filter!): + * Removes elements not specified by a given block. + * - #keep_if: + * Removes elements not specified by a given block. + * - #reject! + * Removes elements specified by a given block. + * + * === Methods for Converting + * + * - #classify: + * Returns a hash that classifies the elements, + * as determined by the given block. + * - #collect! (aliased as #map!): + * Replaces each element with a block return-value. + * - #divide: + * Returns a hash that classifies the elements, + * as determined by the given block; + * differs from #classify in that the block may accept + * either one or two arguments. + * - #flatten: + * Returns a new set that is a recursive flattening of +self+. + * - #flatten!: + * Replaces each nested set in +self+ with the elements from that set. + * - #inspect (aliased as #to_s): + * Returns a string displaying the elements. + * - #join: + * Returns a string containing all elements, converted to strings + * as needed, and joined by the given record separator. + * - #to_a: + * Returns an array containing all set elements. + * - #to_set: + * Returns +self+ if given no arguments and no block; + * with a block given, returns a new set consisting of block + * return values. + * + * === Methods for Iterating + * + * - #each: + * Calls the block with each successive element; returns +self+. + * + * === Other Methods + * + * - #reset: + * Resets the internal state; useful if an object + * has been modified while an element in the set. + * + */ +void +Init_Set(void) +{ + rb_cSet = rb_define_class("Set", rb_cObject); + rb_include_module(rb_cSet, rb_mEnumerable); + + id_each_entry = rb_intern_const("each_entry"); + id_any_p = rb_intern_const("any?"); + id_new = rb_intern_const("new"); + id_set_iter_lev = rb_make_internal_id(); + + rb_define_alloc_func(rb_cSet, set_s_alloc); + rb_define_singleton_method(rb_cSet, "[]", set_s_create, -1); + + rb_define_method(rb_cSet, "initialize", set_i_initialize, -1); + rb_define_method(rb_cSet, "initialize_copy", set_i_initialize_copy, 1); + + rb_define_method(rb_cSet, "&", set_i_intersection, 1); + rb_define_alias(rb_cSet, "intersection", "&"); + rb_define_method(rb_cSet, "-", set_i_difference, 1); + rb_define_alias(rb_cSet, "difference", "-"); + rb_define_method(rb_cSet, "^", set_i_xor, 1); + rb_define_method(rb_cSet, "|", set_i_union, 1); + rb_define_alias(rb_cSet, "+", "|"); + rb_define_alias(rb_cSet, "union", "|"); + rb_define_method(rb_cSet, "<=>", set_i_compare, 1); + rb_define_method(rb_cSet, "==", set_i_eq, 1); + rb_define_alias(rb_cSet, "eql?", "=="); + rb_define_method(rb_cSet, "add", set_i_add, 1); + rb_define_alias(rb_cSet, "<<", "add"); + rb_define_method(rb_cSet, "add?", set_i_add_p, 1); + rb_define_method(rb_cSet, "classify", set_i_classify, 0); + rb_define_method(rb_cSet, "clear", set_i_clear, 0); + rb_define_method(rb_cSet, "collect!", set_i_collect, 0); + rb_define_alias(rb_cSet, "map!", "collect!"); + rb_define_method(rb_cSet, "compare_by_identity", set_i_compare_by_identity, 0); + rb_define_method(rb_cSet, "compare_by_identity?", set_i_compare_by_identity_p, 0); + rb_define_method(rb_cSet, "delete", set_i_delete, 1); + rb_define_method(rb_cSet, "delete?", set_i_delete_p, 1); + rb_define_method(rb_cSet, "delete_if", set_i_delete_if, 0); + rb_define_method(rb_cSet, "disjoint?", set_i_disjoint, 1); + rb_define_method(rb_cSet, "divide", set_i_divide, 0); + rb_define_method(rb_cSet, "each", set_i_each, 0); + rb_define_method(rb_cSet, "empty?", set_i_empty, 0); + rb_define_method(rb_cSet, "flatten", set_i_flatten, 0); + rb_define_method(rb_cSet, "flatten!", set_i_flatten_bang, 0); + rb_define_method(rb_cSet, "hash", set_i_hash, 0); + rb_define_method(rb_cSet, "include?", set_i_include, 1); + rb_define_alias(rb_cSet, "member?", "include?"); + rb_define_alias(rb_cSet, "===", "include?"); + rb_define_method(rb_cSet, "inspect", set_i_inspect, 0); + rb_define_alias(rb_cSet, "to_s", "inspect"); + rb_define_method(rb_cSet, "intersect?", set_i_intersect, 1); + rb_define_method(rb_cSet, "join", set_i_join, -1); + rb_define_method(rb_cSet, "keep_if", set_i_keep_if, 0); + rb_define_method(rb_cSet, "merge", set_i_merge, -1); + rb_define_method(rb_cSet, "proper_subset?", set_i_proper_subset, 1); + rb_define_alias(rb_cSet, "<", "proper_subset?"); + rb_define_method(rb_cSet, "proper_superset?", set_i_proper_superset, 1); + rb_define_alias(rb_cSet, ">", "proper_superset?"); + rb_define_method(rb_cSet, "reject!", set_i_reject, 0); + rb_define_method(rb_cSet, "replace", set_i_replace, 1); + rb_define_method(rb_cSet, "reset", set_i_reset, 0); + rb_define_method(rb_cSet, "size", set_i_size, 0); + rb_define_alias(rb_cSet, "length", "size"); + rb_define_method(rb_cSet, "select!", set_i_select, 0); + rb_define_alias(rb_cSet, "filter!", "select!"); + rb_define_method(rb_cSet, "subset?", set_i_subset, 1); + rb_define_alias(rb_cSet, "<=", "subset?"); + rb_define_method(rb_cSet, "subtract", set_i_subtract, 1); + rb_define_method(rb_cSet, "superset?", set_i_superset, 1); + rb_define_alias(rb_cSet, ">=", "superset?"); + rb_define_method(rb_cSet, "to_a", set_i_to_a, 0); + rb_define_method(rb_cSet, "to_set", set_i_to_set, -1); + + rb_provide("set.rb"); +} |