summaryrefslogtreecommitdiff
path: root/set.c
diff options
context:
space:
mode:
Diffstat (limited to 'set.c')
-rw-r--r--set.c2138
1 files changed, 2138 insertions, 0 deletions
diff --git a/set.c b/set.c
new file mode 100644
index 0000000000..8250a972ad
--- /dev/null
+++ b/set.c
@@ -0,0 +1,2138 @@
+/* This implements sets using the same hash table implementation as in
+ st.c, but without a value for each hash entry. This results in the
+ same basic performance characteristics as when using an st table,
+ but uses 1/3 less memory.
+ */
+
+#include "id.h"
+#include "internal.h"
+#include "internal/bits.h"
+#include "internal/hash.h"
+#include "internal/proc.h"
+#include "internal/sanitizers.h"
+#include "internal/set_table.h"
+#include "internal/symbol.h"
+#include "internal/variable.h"
+#include "ruby_assert.h"
+
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <string.h>
+
+#ifndef SET_DEBUG
+#define SET_DEBUG 0
+#endif
+
+#if SET_DEBUG
+#include "internal/gc.h"
+#endif
+
+static st_index_t
+dbl_to_index(double d)
+{
+ union {double d; st_index_t i;} u;
+ u.d = d;
+ return u.i;
+}
+
+static const uint64_t prime1 = ((uint64_t)0x2e0bb864 << 32) | 0xe9ea7df5;
+static const uint32_t prime2 = 0x830fcab9;
+
+static inline uint64_t
+mult_and_mix(uint64_t m1, uint64_t m2)
+{
+#if defined HAVE_UINT128_T
+ uint128_t r = (uint128_t) m1 * (uint128_t) m2;
+ return (uint64_t) (r >> 64) ^ (uint64_t) r;
+#else
+ uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32;
+ uint64_t lm1 = m1, lm2 = m2;
+ uint64_t v64_128 = hm1 * hm2;
+ uint64_t v32_96 = hm1 * lm2 + lm1 * hm2;
+ uint64_t v1_32 = lm1 * lm2;
+
+ return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32);
+#endif
+}
+
+static inline uint64_t
+key64_hash(uint64_t key, uint32_t seed)
+{
+ return mult_and_mix(key + seed, prime1);
+}
+
+/* Should cast down the result for each purpose */
+#define set_index_hash(index) key64_hash(rb_hash_start(index), prime2)
+
+static st_index_t
+set_ident_hash(st_data_t n)
+{
+#ifdef USE_FLONUM /* RUBY */
+ /*
+ * - flonum (on 64-bit) is pathologically bad, mix the actual
+ * float value in, but do not use the float value as-is since
+ * many integers get interpreted as 2.0 or -2.0 [Bug #10761]
+ */
+ if (FLONUM_P(n)) {
+ n ^= dbl_to_index(rb_float_value(n));
+ }
+#endif
+
+ return (st_index_t)set_index_hash((st_index_t)n);
+}
+
+static const struct st_hash_type identhash = {
+ rb_st_numcmp,
+ set_ident_hash,
+};
+
+static const struct st_hash_type objhash = {
+ rb_any_cmp,
+ rb_any_hash,
+};
+
+VALUE rb_cSet;
+
+#define id_each idEach
+static ID id_each_entry;
+static ID id_any_p;
+static ID id_new;
+static ID id_set_iter_lev;
+
+#define RSET_INITIALIZED FL_USER1
+#define RSET_LEV_MASK (FL_USER13 | FL_USER14 | FL_USER15 | /* FL 13..19 */ \
+ FL_USER16 | FL_USER17 | FL_USER18 | FL_USER19)
+#define RSET_LEV_SHIFT (FL_USHIFT + 13)
+#define RSET_LEV_MAX 127 /* 7 bits */
+
+#define SET_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(SET_DEBUG, expr, #expr)
+
+#define RSET_SIZE(set) set_table_size(RSET_TABLE(set))
+#define RSET_EMPTY(set) (RSET_SIZE(set) == 0)
+#define RSET_SIZE_NUM(set) SIZET2NUM(RSET_SIZE(set))
+#define RSET_IS_MEMBER(sobj, item) set_lookup(RSET_TABLE(set), (st_data_t)(item))
+#define RSET_COMPARE_BY_IDENTITY(set) (RSET_TABLE(set)->type == &identhash)
+
+struct set_object {
+ set_table table;
+};
+
+static int
+mark_key(st_data_t key, st_data_t data)
+{
+ rb_gc_mark_movable((VALUE)key);
+
+ return ST_CONTINUE;
+}
+
+static void
+set_mark(void *ptr)
+{
+ struct set_object *sobj = ptr;
+ if (sobj->table.entries) set_foreach(&sobj->table, mark_key, 0);
+}
+
+static void
+set_free_embedded(struct set_object *sobj)
+{
+ free((&sobj->table)->bins);
+ free((&sobj->table)->entries);
+}
+
+static void
+set_free(void *ptr)
+{
+ struct set_object *sobj = ptr;
+ set_free_embedded(sobj);
+ memset(&sobj->table, 0, sizeof(sobj->table));
+}
+
+static size_t
+set_size(const void *ptr)
+{
+ const struct set_object *sobj = ptr;
+ /* Do not count the table size twice, as it is embedded */
+ return (unsigned long)set_memsize(&sobj->table) - sizeof(sobj->table);
+}
+
+static int
+set_foreach_replace(st_data_t key, st_data_t argp, int error)
+{
+ if (rb_gc_location((VALUE)key) != (VALUE)key) {
+ return ST_REPLACE;
+ }
+
+ return ST_CONTINUE;
+}
+
+static int
+set_replace_ref(st_data_t *key, st_data_t argp, int existing)
+{
+ if (rb_gc_location((VALUE)*key) != (VALUE)*key) {
+ *key = rb_gc_location((VALUE)*key);
+ }
+
+ return ST_CONTINUE;
+}
+
+static void
+set_compact(void *ptr)
+{
+ struct set_object *sobj = ptr;
+ set_compact_table(&sobj->table);
+ set_foreach_with_replace(&sobj->table, set_foreach_replace, set_replace_ref, 0);
+}
+
+static const rb_data_type_t set_data_type = {
+ .wrap_struct_name = "set",
+ .function = {
+ .dmark = set_mark,
+ .dfree = set_free,
+ .dsize = set_size,
+ .dcompact = set_compact,
+ },
+ .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE
+};
+
+static inline set_table *
+RSET_TABLE(VALUE set)
+{
+ struct set_object *sobj;
+ TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj);
+ return &sobj->table;
+}
+
+static unsigned long
+iter_lev_in_ivar(VALUE set)
+{
+ VALUE levval = rb_ivar_get(set, id_set_iter_lev);
+ SET_ASSERT(FIXNUM_P(levval));
+ long lev = FIX2LONG(levval);
+ SET_ASSERT(lev >= 0);
+ return (unsigned long)lev;
+}
+
+void rb_ivar_set_internal(VALUE obj, ID id, VALUE val);
+
+static void
+iter_lev_in_ivar_set(VALUE set, unsigned long lev)
+{
+ SET_ASSERT(lev >= RSET_LEV_MAX);
+ SET_ASSERT(POSFIXABLE(lev)); /* POSFIXABLE means fitting to long */
+ rb_ivar_set_internal(set, id_set_iter_lev, LONG2FIX((long)lev));
+}
+
+static inline unsigned long
+iter_lev_in_flags(VALUE set)
+{
+ return (unsigned long)((RBASIC(set)->flags >> RSET_LEV_SHIFT) & RSET_LEV_MAX);
+}
+
+static inline void
+iter_lev_in_flags_set(VALUE set, unsigned long lev)
+{
+ SET_ASSERT(lev <= RSET_LEV_MAX);
+ RBASIC(set)->flags = ((RBASIC(set)->flags & ~RSET_LEV_MASK) | ((VALUE)lev << RSET_LEV_SHIFT));
+}
+
+static inline bool
+set_iterating_p(VALUE set)
+{
+ return iter_lev_in_flags(set) > 0;
+}
+
+static void
+set_iter_lev_inc(VALUE set)
+{
+ unsigned long lev = iter_lev_in_flags(set);
+ if (lev == RSET_LEV_MAX) {
+ lev = iter_lev_in_ivar(set) + 1;
+ if (!POSFIXABLE(lev)) { /* paranoiac check */
+ rb_raise(rb_eRuntimeError, "too much nested iterations");
+ }
+ }
+ else {
+ lev += 1;
+ iter_lev_in_flags_set(set, lev);
+ if (lev < RSET_LEV_MAX) return;
+ }
+ iter_lev_in_ivar_set(set, lev);
+}
+
+static void
+set_iter_lev_dec(VALUE set)
+{
+ unsigned long lev = iter_lev_in_flags(set);
+ if (lev == RSET_LEV_MAX) {
+ lev = iter_lev_in_ivar(set);
+ if (lev > RSET_LEV_MAX) {
+ iter_lev_in_ivar_set(set, lev-1);
+ return;
+ }
+ rb_attr_delete(set, id_set_iter_lev);
+ }
+ else if (lev == 0) {
+ rb_raise(rb_eRuntimeError, "iteration level underflow");
+ }
+ iter_lev_in_flags_set(set, lev - 1);
+}
+
+static VALUE
+set_foreach_ensure(VALUE set)
+{
+ set_iter_lev_dec(set);
+ return 0;
+}
+
+typedef int set_foreach_func(VALUE, VALUE);
+
+struct set_foreach_arg {
+ VALUE set;
+ set_foreach_func *func;
+ VALUE arg;
+};
+
+static int
+set_iter_status_check(int status)
+{
+ if (status == ST_CONTINUE) {
+ return ST_CHECK;
+ }
+
+ return status;
+}
+
+static int
+set_foreach_iter(st_data_t key, st_data_t argp, int error)
+{
+ struct set_foreach_arg *arg = (struct set_foreach_arg *)argp;
+
+ if (error) return ST_STOP;
+
+ set_table *tbl = RSET_TABLE(arg->set);
+ int status = (*arg->func)((VALUE)key, arg->arg);
+
+ if (RSET_TABLE(arg->set) != tbl) {
+ rb_raise(rb_eRuntimeError, "reset occurred during iteration");
+ }
+
+ return set_iter_status_check(status);
+}
+
+static VALUE
+set_foreach_call(VALUE arg)
+{
+ VALUE set = ((struct set_foreach_arg *)arg)->set;
+ int ret = 0;
+ ret = set_foreach_check(RSET_TABLE(set), set_foreach_iter,
+ (st_data_t)arg, (st_data_t)Qundef);
+ if (ret) {
+ rb_raise(rb_eRuntimeError, "ret: %d, set modified during iteration", ret);
+ }
+ return Qnil;
+}
+
+static void
+set_iter(VALUE set, set_foreach_func *func, VALUE farg)
+{
+ struct set_foreach_arg arg;
+
+ if (RSET_EMPTY(set))
+ return;
+ arg.set = set;
+ arg.func = func;
+ arg.arg = farg;
+ if (RB_OBJ_FROZEN(set)) {
+ set_foreach_call((VALUE)&arg);
+ }
+ else {
+ set_iter_lev_inc(set);
+ rb_ensure(set_foreach_call, (VALUE)&arg, set_foreach_ensure, set);
+ }
+}
+
+NORETURN(static void no_new_item(void));
+static void
+no_new_item(void)
+{
+ rb_raise(rb_eRuntimeError, "can't add a new item into set during iteration");
+}
+
+static void
+set_compact_after_delete(VALUE set)
+{
+ if (!set_iterating_p(set)) {
+ set_compact_table(RSET_TABLE(set));
+ }
+}
+
+static int
+set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr)
+{
+ if (tab->type != &identhash && rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) {
+ key = rb_hash_key_str(key);
+ if (key_addr) *key_addr = key;
+ }
+ int ret = set_insert(tab, (st_data_t)key);
+ if (ret == 0) RB_OBJ_WRITTEN(set, Qundef, key);
+ return ret;
+}
+
+static int
+set_insert_wb(VALUE set, VALUE key, VALUE *key_addr)
+{
+ return set_table_insert_wb(RSET_TABLE(set), set, key, key_addr);
+}
+
+static VALUE
+set_alloc_with_size(VALUE klass, st_index_t size)
+{
+ VALUE set;
+ struct set_object *sobj;
+
+ set = TypedData_Make_Struct(klass, struct set_object, &set_data_type, sobj);
+ set_init_table_with_size(&sobj->table, &objhash, size);
+
+ return set;
+}
+
+
+static VALUE
+set_s_alloc(VALUE klass)
+{
+ return set_alloc_with_size(klass, 0);
+}
+
+static VALUE
+set_s_create(int argc, VALUE *argv, VALUE klass)
+{
+ VALUE set = set_alloc_with_size(klass, argc);
+ set_table *table = RSET_TABLE(set);
+ int i;
+
+ for (i=0; i < argc; i++) {
+ set_table_insert_wb(table, set, argv[i], NULL);
+ }
+
+ return set;
+}
+
+static void
+check_set(VALUE arg)
+{
+ if (!rb_obj_is_kind_of(arg, rb_cSet)) {
+ rb_raise(rb_eArgError, "value must be a set");
+ }
+}
+
+static ID
+enum_method_id(VALUE other)
+{
+ if (rb_respond_to(other, id_each_entry)) {
+ return id_each_entry;
+ }
+ else if (rb_respond_to(other, id_each)) {
+ return id_each;
+ }
+ else {
+ rb_raise(rb_eArgError, "value must be enumerable");
+ }
+}
+
+static VALUE
+set_enum_size(VALUE set, VALUE args, VALUE eobj)
+{
+ return RSET_SIZE_NUM(set);
+}
+
+static VALUE
+set_initialize_without_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set))
+{
+ VALUE element = i;
+ set_insert_wb(set, element, &element);
+ return element;
+}
+
+static VALUE
+set_initialize_with_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set))
+{
+ VALUE element = rb_yield(i);
+ set_insert_wb(set, element, &element);
+ return element;
+}
+
+/*
+ * call-seq:
+ * Set.new -> new_set
+ * Set.new(enum) -> new_set
+ * Set.new(enum) { |elem| ... } -> new_set
+ *
+ * Creates a new set containing the elements of the given enumerable
+ * object.
+ *
+ * If a block is given, the elements of enum are preprocessed by the
+ * given block.
+ *
+ * Set.new([1, 2]) #=> #<Set: {1, 2}>
+ * Set.new([1, 2, 1]) #=> #<Set: {1, 2}>
+ * Set.new([1, 'c', :s]) #=> #<Set: {1, "c", :s}>
+ * Set.new(1..5) #=> #<Set: {1, 2, 3, 4, 5}>
+ * Set.new([1, 2, 3]) { |x| x * x } #=> #<Set: {1, 4, 9}>
+ */
+static VALUE
+set_i_initialize(int argc, VALUE *argv, VALUE set)
+{
+ if (RBASIC(set)->flags & RSET_INITIALIZED) {
+ rb_raise(rb_eRuntimeError, "cannot reinitialize set");
+ }
+ RBASIC(set)->flags |= RSET_INITIALIZED;
+
+ VALUE other;
+ rb_check_arity(argc, 0, 1);
+
+ if (argc > 0 && (other = argv[0]) != Qnil) {
+ if (RB_TYPE_P(other, T_ARRAY)) {
+ long len = RARRAY_LEN(other);
+ if (RARRAY_LEN(other) != 0) {
+ set_table *into = RSET_TABLE(set);
+ VALUE key;
+ int block_given = rb_block_given_p();
+ RARRAY_PTR_USE(other, ptr, {
+ for(; len > 0; len--, ptr++) {
+ key = *ptr;
+ if (block_given) key = rb_yield(key);
+ set_table_insert_wb(into, set, key, NULL);
+ }
+ });
+ }
+ }
+ else {
+ rb_block_call(other, enum_method_id(other), 0, 0,
+ rb_block_given_p() ? set_initialize_with_block : set_initialize_without_block,
+ set);
+ }
+ }
+
+ return set;
+}
+
+static VALUE
+set_i_initialize_copy(VALUE set, VALUE other)
+{
+ if (set == other) return set;
+
+ if (set_iterating_p(set)) {
+ rb_raise(rb_eRuntimeError, "cannot replace set during iteration");
+ }
+
+ struct set_object *sobj;
+ TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj);
+
+ set_free_embedded(sobj);
+ set_copy(&sobj->table, RSET_TABLE(other));
+
+ return set;
+}
+
+static int
+set_inspect_i(st_data_t key, st_data_t arg)
+{
+ VALUE str = (VALUE)arg;
+ if (RSTRING_LEN(str) > 8) {
+ rb_str_buf_cat_ascii(str, ", ");
+ }
+ rb_str_buf_append(str, rb_inspect((VALUE)key));
+
+ return ST_CONTINUE;
+}
+
+static VALUE
+set_inspect(VALUE set, VALUE dummy, int recur)
+{
+ VALUE str;
+
+ if (recur) return rb_usascii_str_new2("#<Set: {...}>");
+ str = rb_str_buf_new2("#<Set: {");
+ set_iter(set, set_inspect_i, str);
+ rb_str_buf_cat2(str, "}>");
+
+ return str;
+}
+
+/*
+ * call-seq:
+ * inspect -> new_string
+ *
+ * Returns a new string containing the set entries:
+ *
+ * s = Set.new
+ * s.inspect # => "#<Set: {}>"
+ * s.add(1)
+ * s.inspect # => "#<Set: {1}>"
+ * s.add(2)
+ * s.inspect # => "#<Set: {1, 2}>"
+ *
+ * Related: see {Methods for Converting}[rdoc-ref:Set@Methods+for+Converting].
+ */
+static VALUE
+set_i_inspect(VALUE set)
+{
+ return rb_exec_recursive(set_inspect, set, 0);
+}
+
+static int
+set_to_a_i(st_data_t key, st_data_t arg)
+{
+ rb_ary_push((VALUE)arg, (VALUE)key);
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * to_a -> array
+ *
+ * Returns an array containing all elements in the set.
+ *
+ * Set[1, 2].to_a #=> [1, 2]
+ * Set[1, 'c', :s].to_a #=> [1, "c", :s]
+ */
+static VALUE
+set_i_to_a(VALUE set)
+{
+ st_index_t size = RSET_SIZE(set);
+ VALUE ary = rb_ary_new_capa(size);
+
+ if (size == 0) return ary;
+
+ if (ST_DATA_COMPATIBLE_P(VALUE)) {
+ RARRAY_PTR_USE(ary, ptr, {
+ size = set_keys(RSET_TABLE(set), ptr, size);
+ });
+ rb_gc_writebarrier_remember(ary);
+ rb_ary_set_len(ary, size);
+ }
+ else {
+ set_iter(set, set_to_a_i, (st_data_t)ary);
+ }
+ return ary;
+}
+
+/*
+ * call-seq:
+ * to_set(klass = Set, *args, &block) -> self or new_set
+ *
+ * Returns self if receiver is an instance of +Set+ and no arguments or
+ * block are given. Otherwise, converts the set to another with
+ * <tt>klass.new(self, *args, &block)</tt>.
+ *
+ * In subclasses, returns `klass.new(self, *args, &block)` unless overridden.
+ */
+static VALUE
+set_i_to_set(int argc, VALUE *argv, VALUE set)
+{
+ VALUE klass;
+
+ if (argc == 0) {
+ klass = rb_cSet;
+ argv = &set;
+ argc = 1;
+ }
+ else {
+ klass = argv[0];
+ argv[0] = set;
+ }
+
+ if (klass == rb_cSet && rb_obj_is_instance_of(set, rb_cSet) &&
+ argc == 1 && !rb_block_given_p()) {
+ return set;
+ }
+
+ return rb_funcall_passing_block(klass, id_new, argc, argv);
+}
+
+/*
+ * call-seq:
+ * join(separator=nil)-> new_string
+ *
+ * Returns a string created by converting each element of the set to a string.
+ */
+static VALUE
+set_i_join(int argc, VALUE *argv, VALUE set)
+{
+ rb_check_arity(argc, 0, 1);
+ return rb_ary_join(set_i_to_a(set), argc == 0 ? Qnil : argv[0]);
+}
+
+/*
+ * call-seq:
+ * add(obj) -> self
+ *
+ * Adds the given object to the set and returns self. Use `merge` to
+ * add many elements at once.
+ *
+ * Set[1, 2].add(3) #=> #<Set: {1, 2, 3}>
+ * Set[1, 2].add([3, 4]) #=> #<Set: {1, 2, [3, 4]}>
+ * Set[1, 2].add(2) #=> #<Set: {1, 2}>
+ */
+static VALUE
+set_i_add(VALUE set, VALUE item)
+{
+ rb_check_frozen(set);
+ if (set_iterating_p(set)) {
+ if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) {
+ no_new_item();
+ }
+ }
+ else {
+ set_insert_wb(set, item, NULL);
+ }
+ return set;
+}
+
+/*
+ * call-seq:
+ * add?(obj) -> self or nil
+ *
+ * Adds the given object to the set and returns self. If the object is
+ * already in the set, returns nil.
+ *
+ * Set[1, 2].add?(3) #=> #<Set: {1, 2, 3}>
+ * Set[1, 2].add?([3, 4]) #=> #<Set: {1, 2, [3, 4]}>
+ * Set[1, 2].add?(2) #=> nil
+ */
+static VALUE
+set_i_add_p(VALUE set, VALUE item)
+{
+ rb_check_frozen(set);
+ if (set_iterating_p(set)) {
+ if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) {
+ no_new_item();
+ }
+ return Qnil;
+ }
+ else {
+ return set_insert_wb(set, item, NULL) ? Qnil : set;
+ }
+}
+
+/*
+ * call-seq:
+ * delete(obj) -> self
+ *
+ * Deletes the given object from the set and returns self. Use subtract
+ * to delete many items at once.
+ */
+static VALUE
+set_i_delete(VALUE set, VALUE item)
+{
+ rb_check_frozen(set);
+ if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) {
+ set_compact_after_delete(set);
+ }
+ return set;
+}
+
+/*
+ * call-seq:
+ * delete?(obj) -> self or nil
+ *
+ * Deletes the given object from the set and returns self. If the
+ * object is not in the set, returns nil.
+ */
+static VALUE
+set_i_delete_p(VALUE set, VALUE item)
+{
+ rb_check_frozen(set);
+ if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) {
+ set_compact_after_delete(set);
+ return set;
+ }
+ return Qnil;
+}
+
+static int
+set_delete_if_i(st_data_t key, st_data_t dummy)
+{
+ return RTEST(rb_yield((VALUE)key)) ? ST_DELETE : ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * delete_if { |o| ... } -> self
+ * delete_if -> enumerator
+ *
+ * Deletes every element of the set for which block evaluates to
+ * true, and returns self. Returns an enumerator if no block is given.
+ */
+static VALUE
+set_i_delete_if(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+ rb_check_frozen(set);
+ set_iter(set, set_delete_if_i, 0);
+ set_compact_after_delete(set);
+ return set;
+}
+
+/*
+ * call-seq:
+ * reject! { |o| ... } -> self
+ * reject! -> enumerator
+ *
+ * Equivalent to Set#delete_if, but returns nil if no changes were made.
+ * Returns an enumerator if no block is given.
+ */
+static VALUE
+set_i_reject(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+ rb_check_frozen(set);
+
+ set_table *table = RSET_TABLE(set);
+ size_t n = set_table_size(table);
+ set_iter(set, set_delete_if_i, 0);
+
+ if (n == set_table_size(table)) return Qnil;
+
+ set_compact_after_delete(set);
+ return set;
+}
+
+static int
+set_classify_i(st_data_t key, st_data_t tmp)
+{
+ VALUE* args = (VALUE*)tmp;
+ VALUE hash = args[0];
+ VALUE hash_key = rb_yield(key);
+ VALUE set = rb_hash_lookup2(hash, hash_key, Qundef);
+ if (set == Qundef) {
+ set = set_s_alloc(args[1]);
+ rb_hash_aset(hash, hash_key, set);
+ }
+ set_i_add(set, key);
+
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * classify { |o| ... } -> hash
+ * classify -> enumerator
+ *
+ * Classifies the set by the return value of the given block and
+ * returns a hash of {value => set of elements} pairs. The block is
+ * called once for each element of the set, passing the element as
+ * parameter.
+ *
+ * files = Set.new(Dir.glob("*.rb"))
+ * hash = files.classify { |f| File.mtime(f).year }
+ * hash #=> {2000 => #<Set: {"a.rb", "b.rb"}>,
+ * # 2001 => #<Set: {"c.rb", "d.rb", "e.rb"}>,
+ * # 2002 => #<Set: {"f.rb"}>}
+ *
+ * Returns an enumerator if no block is given.
+ */
+static VALUE
+set_i_classify(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+ VALUE args[2];
+ args[0] = rb_hash_new();
+ args[1] = rb_obj_class(set);
+ set_iter(set, set_classify_i, (st_data_t)args);
+ return args[0];
+}
+
+struct set_divide_args {
+ VALUE self;
+ VALUE set_class;
+ VALUE final_set;
+ VALUE hash;
+ VALUE current_set;
+ VALUE current_item;
+ unsigned long ni;
+ unsigned long nj;
+};
+
+static VALUE
+set_divide_block0(RB_BLOCK_CALL_FUNC_ARGLIST(j, arg))
+{
+ struct set_divide_args *args = (struct set_divide_args *)arg;
+ if (args->nj > args->ni) {
+ VALUE i = args->current_item;
+ if (RTEST(rb_yield_values(2, i, j)) && RTEST(rb_yield_values(2, j, i))) {
+ VALUE hash = args->hash;
+ if (args->current_set == Qnil) {
+ VALUE set = rb_hash_aref(hash, j);
+ if (set == Qnil) {
+ VALUE both[2] = {i, j};
+ set = set_s_create(2, both, args->set_class);
+ rb_hash_aset(hash, i, set);
+ rb_hash_aset(hash, j, set);
+ set_i_add(args->final_set, set);
+ }
+ else {
+ set_i_add(set, i);
+ rb_hash_aset(hash, i, set);
+ }
+ args->current_set = set;
+ }
+ else {
+ set_i_add(args->current_set, j);
+ rb_hash_aset(hash, j, args->current_set);
+ }
+ }
+ }
+ args->nj++;
+ return j;
+}
+
+static VALUE
+set_divide_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, arg))
+{
+ struct set_divide_args *args = (struct set_divide_args *)arg;
+ VALUE hash = args->hash;
+ args->current_set = rb_hash_aref(hash, i);
+ args->current_item = i;
+ args->nj = 0;
+ rb_block_call(args->self, id_each, 0, 0, set_divide_block0, arg);
+ if (args->current_set == Qnil) {
+ VALUE set = set_s_create(1, &i, args->set_class);
+ rb_hash_aset(hash, i, set);
+ set_i_add(args->final_set, set);
+ }
+ args->ni++;
+ return i;
+}
+
+static void set_merge_enum_into(VALUE set, VALUE arg);
+
+/*
+ * call-seq:
+ * divide { |o1, o2| ... } -> set
+ * divide { |o| ... } -> set
+ * divide -> enumerator
+ *
+ * Divides the set into a set of subsets according to the commonality
+ * defined by the given block.
+ *
+ * If the arity of the block is 2, elements o1 and o2 are in common
+ * if both block.call(o1, o2) and block.call(o2, o1) are true.
+ * Otherwise, elements o1 and o2 are in common if
+ * block.call(o1) == block.call(o2).
+ *
+ * numbers = Set[1, 3, 4, 6, 9, 10, 11]
+ * set = numbers.divide { |i,j| (i - j).abs == 1 }
+ * set #=> #<Set: {#<Set: {1}>,
+ * # #<Set: {3, 4}>,
+ * # #<Set: {6}>}>
+ * # #<Set: {9, 10, 11}>,
+ *
+ * Returns an enumerator if no block is given.
+ */
+static VALUE
+set_i_divide(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+
+ if (rb_block_arity() == 2) {
+ VALUE final_set = set_s_create(0, 0, rb_cSet);
+ struct set_divide_args args = {
+ .self = set,
+ .set_class = rb_obj_class(set),
+ .final_set = final_set,
+ .hash = rb_hash_new(),
+ .current_set = 0,
+ .current_item = 0,
+ .ni = 0,
+ .nj = 0
+ };
+ rb_block_call(set, id_each, 0, 0, set_divide_block, (VALUE)&args);
+ return final_set;
+ }
+
+ VALUE values = rb_hash_values(set_i_classify(set));
+ set = set_alloc_with_size(rb_cSet, RARRAY_LEN(values));
+ set_merge_enum_into(set, values);
+ return set;
+}
+
+static int
+set_clear_i(st_data_t key, st_data_t dummy)
+{
+ return ST_DELETE;
+}
+
+/*
+ * call-seq:
+ * clear -> self
+ *
+ * Removes all elements and returns self.
+ *
+ * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}>
+ * set.clear #=> #<Set: {}>
+ * set #=> #<Set: {}>
+ */
+static VALUE
+set_i_clear(VALUE set)
+{
+ rb_check_frozen(set);
+ if (RSET_SIZE(set) == 0) return set;
+ if (set_iterating_p(set)) {
+ set_iter(set, set_clear_i, 0);
+ }
+ else {
+ set_clear(RSET_TABLE(set));
+ set_compact_after_delete(set);
+ }
+ return set;
+}
+
+struct set_intersection_data {
+ VALUE set;
+ set_table *into;
+ set_table *other;
+};
+
+static int
+set_intersection_i(st_data_t key, st_data_t tmp)
+{
+ struct set_intersection_data *data = (struct set_intersection_data *)tmp;
+ if (set_lookup(data->other, key)) {
+ set_table_insert_wb(data->into, data->set, key, NULL);
+ }
+
+ return ST_CONTINUE;
+}
+
+static VALUE
+set_intersection_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, data))
+{
+ set_intersection_i((st_data_t)i, (st_data_t)data);
+ return i;
+}
+
+/*
+ * call-seq:
+ * set & enum -> new_set
+ *
+ * Returns a new set containing elements common to the set and the given
+ * enumerable object.
+ *
+ * Set[1, 3, 5] & Set[3, 2, 1] #=> #<Set: {3, 1}>
+ * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> #<Set: {"a", "b"}>
+ */
+static VALUE
+set_i_intersection(VALUE set, VALUE other)
+{
+ VALUE new_set = set_s_alloc(rb_obj_class(set));
+ set_table *stable = RSET_TABLE(set);
+ set_table *ntable = RSET_TABLE(new_set);
+
+ if (rb_obj_is_kind_of(other, rb_cSet)) {
+ set_table *otable = RSET_TABLE(other);
+ if (set_table_size(stable) >= set_table_size(otable)) {
+ /* Swap so we iterate over the smaller set */
+ otable = stable;
+ set = other;
+ }
+
+ struct set_intersection_data data = {
+ .set = new_set,
+ .into = ntable,
+ .other = otable
+ };
+ set_iter(set, set_intersection_i, (st_data_t)&data);
+ }
+ else {
+ struct set_intersection_data data = {
+ .set = new_set,
+ .into = ntable,
+ .other = stable
+ };
+ rb_block_call(other, enum_method_id(other), 0, 0, set_intersection_block, (VALUE)&data);
+ }
+
+ return new_set;
+}
+
+/*
+ * call-seq:
+ * include?(item) -> true or false
+ *
+ * Returns true if the set contains the given object:
+ *
+ * Set[1, 2, 3].include? 2 #=> true
+ * Set[1, 2, 3].include? 4 #=> false
+ *
+ * Note that <code>include?</code> and <code>member?</code> do not test member
+ * equality using <code>==</code> as do other Enumerables.
+ *
+ * This is aliased to #===, so it is usable in +case+ expressions:
+ *
+ * case :apple
+ * when Set[:potato, :carrot]
+ * "vegetable"
+ * when Set[:apple, :banana]
+ * "fruit"
+ * end
+ * # => "fruit"
+ *
+ * See also Enumerable#include?
+ */
+static VALUE
+set_i_include(VALUE set, VALUE item)
+{
+ return RBOOL(RSET_IS_MEMBER(set, item));
+}
+
+struct set_merge_args {
+ VALUE set;
+ set_table *into;
+};
+
+static int
+set_merge_i(st_data_t key, st_data_t data)
+{
+ struct set_merge_args *args = (struct set_merge_args *)data;
+ set_table_insert_wb(args->into, args->set, key, NULL);
+ return ST_CONTINUE;
+}
+
+static VALUE
+set_merge_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set))
+{
+ VALUE element = key;
+ set_insert_wb(set, element, &element);
+ return element;
+}
+
+static void
+set_merge_enum_into(VALUE set, VALUE arg)
+{
+ if (rb_obj_is_kind_of(arg, rb_cSet)) {
+ struct set_merge_args args = {
+ .set = set,
+ .into = RSET_TABLE(set)
+ };
+ set_iter(arg, set_merge_i, (st_data_t)&args);
+ }
+ else if (RB_TYPE_P(arg, T_ARRAY)) {
+ long len = RARRAY_LEN(arg);
+ if (RARRAY_LEN(arg) != 0) {
+ set_table *into = RSET_TABLE(set);
+ RARRAY_PTR_USE(arg, ptr, {
+ for(; len > 0; len--, ptr++) {
+ set_table_insert_wb(into, set, *ptr, NULL);
+ }
+ });
+ }
+ }
+ else {
+ rb_block_call(arg, enum_method_id(arg), 0, 0, set_merge_block, (VALUE)set);
+ }
+}
+
+/*
+ * call-seq:
+ * merge(*enums, **nil) -> self
+ *
+ * Merges the elements of the given enumerable objects to the set and
+ * returns self.
+ */
+static VALUE
+set_i_merge(int argc, VALUE *argv, VALUE set)
+{
+ if (rb_keyword_given_p()) {
+ rb_raise(rb_eArgError, "no keywords accepted");
+ }
+ rb_check_frozen(set);
+
+ int i;
+
+ for (i=0; i < argc; i++) {
+ set_merge_enum_into(set, argv[i]);
+ }
+
+ return set;
+}
+
+static VALUE
+set_reset_table_with_type(VALUE set, const struct st_hash_type *type)
+{
+ rb_check_frozen(set);
+
+ struct set_object *sobj;
+ TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj);
+ set_table *old = &sobj->table;
+
+ size_t size = set_table_size(old);
+ if (size > 0) {
+ set_table *new = set_init_table_with_size(NULL, type, size);
+ struct set_merge_args args = {
+ .set = set,
+ .into = new
+ };
+ set_iter(set, set_merge_i, (st_data_t)&args);
+ set_free_embedded(sobj);
+ memcpy(&sobj->table, new, sizeof(*new));
+ free(new);
+ }
+ else {
+ sobj->table.type = type;
+ }
+
+ return set;
+}
+
+/*
+ * call-seq:
+ * compare_by_identity -> self
+ *
+ * Makes the set compare its elements by their identity and returns self.
+ */
+static VALUE
+set_i_compare_by_identity(VALUE set)
+{
+ if (RSET_COMPARE_BY_IDENTITY(set)) return set;
+
+ if (set_iterating_p(set)) {
+ rb_raise(rb_eRuntimeError, "compare_by_identity during iteration");
+ }
+
+ return set_reset_table_with_type(set, &identhash);
+}
+
+/*
+ * call-seq:
+ * compare_by_identity? -> true or false
+ *
+ * Returns true if the set will compare its elements by their
+ * identity. Also see Set#compare_by_identity.
+ */
+static VALUE
+set_i_compare_by_identity_p(VALUE set)
+{
+ return RBOOL(RSET_COMPARE_BY_IDENTITY(set));
+}
+
+/*
+ * call-seq:
+ * size -> integer
+ *
+ * Returns the number of elements.
+ */
+static VALUE
+set_i_size(VALUE set)
+{
+ return RSET_SIZE_NUM(set);
+}
+
+/*
+ * call-seq:
+ * empty? -> true or false
+ *
+ * Returns true if the set contains no elements.
+ */
+static VALUE
+set_i_empty(VALUE set)
+{
+ return RBOOL(RSET_EMPTY(set));
+}
+
+static int
+set_xor_i(st_data_t key, st_data_t data)
+{
+ VALUE element = (VALUE)key;
+ VALUE set = (VALUE)data;
+ set_table *table = RSET_TABLE(set);
+ if (set_table_insert_wb(table, set, element, &element)) {
+ set_delete(table, &element);
+ }
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * set ^ enum -> new_set
+ *
+ * Returns a new set containing elements exclusive between the set and the
+ * given enumerable object. <tt>(set ^ enum)</tt> is equivalent to
+ * <tt>((set | enum) - (set & enum))</tt>.
+ *
+ * Set[1, 2] ^ Set[2, 3] #=> #<Set: {3, 1}>
+ * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> #<Set: {"d", 1, "c"}>
+ */
+static VALUE
+set_i_xor(VALUE set, VALUE other)
+{
+ VALUE new_set;
+ if (rb_obj_is_kind_of(other, rb_cSet)) {
+ new_set = other;
+ }
+ else {
+ new_set = set_s_alloc(rb_obj_class(set));
+ set_merge_enum_into(new_set, other);
+ }
+ set_iter(set, set_xor_i, (st_data_t)new_set);
+ return new_set;
+}
+
+/*
+ * call-seq:
+ * set | enum -> new_set
+ *
+ * Returns a new set built by merging the set and the elements of the
+ * given enumerable object.
+ *
+ * Set[1, 2, 3] | Set[2, 4, 5] #=> #<Set: {1, 2, 3, 4, 5}>
+ * Set[1, 5, 'z'] | (1..6) #=> #<Set: {1, 5, "z", 2, 3, 4, 6}>
+ */
+static VALUE
+set_i_union(VALUE set, VALUE other)
+{
+ set = rb_obj_dup(set);
+ set_merge_enum_into(set, other);
+ return set;
+}
+
+static int
+set_remove_i(st_data_t key, st_data_t from)
+{
+ set_delete((struct set_table *)from, (st_data_t *)&key);
+ return ST_CONTINUE;
+}
+
+static VALUE
+set_remove_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set))
+{
+ rb_check_frozen(set);
+ set_delete(RSET_TABLE(set), (st_data_t *)&key);
+ return key;
+}
+
+static void
+set_remove_enum_from(VALUE set, VALUE arg)
+{
+ if (rb_obj_is_kind_of(arg, rb_cSet)) {
+ set_iter(arg, set_remove_i, (st_data_t)RSET_TABLE(set));
+ }
+ else {
+ rb_block_call(arg, enum_method_id(arg), 0, 0, set_remove_block, (VALUE)set);
+ }
+}
+
+/*
+ * call-seq:
+ * subtract(enum) -> self
+ *
+ * Deletes every element that appears in the given enumerable object
+ * and returns self.
+ */
+static VALUE
+set_i_subtract(VALUE set, VALUE other)
+{
+ rb_check_frozen(set);
+ set_remove_enum_from(set, other);
+ return set;
+}
+
+/*
+ * call-seq:
+ * set - enum -> new_set
+ *
+ * Returns a new set built by duplicating the set, removing every
+ * element that appears in the given enumerable object.
+ *
+ * Set[1, 3, 5] - Set[1, 5] #=> #<Set: {3}>
+ * Set['a', 'b', 'z'] - ['a', 'c'] #=> #<Set: {"b", "z"}>
+ */
+static VALUE
+set_i_difference(VALUE set, VALUE other)
+{
+ return set_i_subtract(rb_obj_dup(set), other);
+}
+
+static int
+set_each_i(st_data_t key, st_data_t dummy)
+{
+ rb_yield(key);
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * each { |o| ... } -> self
+ * each -> enumerator
+ *
+ * Calls the given block once for each element in the set, passing
+ * the element as parameter. Returns an enumerator if no block is
+ * given.
+ */
+static VALUE
+set_i_each(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+ set_iter(set, set_each_i, 0);
+ return set;
+}
+
+static int
+set_collect_i(st_data_t key, st_data_t data)
+{
+ set_insert_wb((VALUE)data, rb_yield((VALUE)key), NULL);
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * collect! { |o| ... } -> self
+ * collect! -> enumerator
+ *
+ * Replaces the elements with ones returned by +collect+.
+ * Returns an enumerator if no block is given.
+ */
+static VALUE
+set_i_collect(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+ rb_check_frozen(set);
+
+ VALUE new_set = set_s_alloc(rb_obj_class(set));
+ set_iter(set, set_collect_i, (st_data_t)new_set);
+ set_i_initialize_copy(set, new_set);
+
+ return set;
+}
+
+static int
+set_keep_if_i(st_data_t key, st_data_t into)
+{
+ if (!RTEST(rb_yield((VALUE)key))) {
+ set_delete((set_table *)into, &key);
+ }
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * keep_if { |o| ... } -> self
+ * keep_if -> enumerator
+ *
+ * Deletes every element of the set for which block evaluates to false, and
+ * returns self. Returns an enumerator if no block is given.
+ */
+static VALUE
+set_i_keep_if(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+ rb_check_frozen(set);
+
+ set_iter(set, set_keep_if_i, (st_data_t)RSET_TABLE(set));
+
+ return set;
+}
+
+/*
+ * call-seq:
+ * select! { |o| ... } -> self
+ * select! -> enumerator
+ *
+ * Equivalent to Set#keep_if, but returns nil if no changes were made.
+ * Returns an enumerator if no block is given.
+ */
+static VALUE
+set_i_select(VALUE set)
+{
+ RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size);
+ rb_check_frozen(set);
+
+ set_table *table = RSET_TABLE(set);
+ size_t n = set_table_size(table);
+ set_iter(set, set_keep_if_i, (st_data_t)table);
+
+ return (n == set_table_size(table)) ? Qnil : set;
+}
+
+/*
+ * call-seq:
+ * replace(enum) -> self
+ *
+ * Replaces the contents of the set with the contents of the given
+ * enumerable object and returns self.
+ *
+ * set = Set[1, 'c', :s] #=> #<Set: {1, "c", :s}>
+ * set.replace([1, 2]) #=> #<Set: {1, 2}>
+ * set #=> #<Set: {1, 2}>
+ */
+static VALUE
+set_i_replace(VALUE set, VALUE other)
+{
+ rb_check_frozen(set);
+
+ if (rb_obj_is_kind_of(other, rb_cSet)) {
+ set_i_initialize_copy(set, other);
+ }
+ else {
+ if (set_iterating_p(set)) {
+ rb_raise(rb_eRuntimeError, "cannot replace set during iteration");
+ }
+
+ // make sure enum is enumerable before calling clear
+ enum_method_id(other);
+
+ set_clear(RSET_TABLE(set));
+ set_merge_enum_into(set, other);
+ }
+
+ return set;
+}
+
+/*
+ * call-seq:
+ * reset -> self
+ *
+ * Resets the internal state after modification to existing elements
+ * and returns self. Elements will be reindexed and deduplicated.
+ */
+static VALUE
+set_i_reset(VALUE set)
+{
+ if (set_iterating_p(set)) {
+ rb_raise(rb_eRuntimeError, "reset during iteration");
+ }
+
+ return set_reset_table_with_type(set, RSET_TABLE(set)->type);
+}
+
+static void set_flatten_merge(VALUE set, VALUE from, VALUE seen);
+
+static int
+set_flatten_merge_i(st_data_t item, st_data_t arg)
+{
+ VALUE *args = (VALUE *)arg;
+ VALUE set = args[0];
+ if (rb_obj_is_kind_of(item, rb_cSet)) {
+ VALUE e_id = rb_obj_id(item);
+ VALUE hash = args[2];
+ switch(rb_hash_aref(hash, e_id)) {
+ case Qfalse:
+ return ST_CONTINUE;
+ case Qtrue:
+ rb_raise(rb_eArgError, "tried to flatten recursive Set");
+ default:
+ break;
+ }
+
+ rb_hash_aset(hash, e_id, Qtrue);
+ set_flatten_merge(set, item, hash);
+ rb_hash_aset(hash, e_id, Qfalse);
+ }
+ else {
+ set_i_add(set, item);
+ }
+ return ST_CONTINUE;
+}
+
+static void
+set_flatten_merge(VALUE set, VALUE from, VALUE hash)
+{
+ VALUE args[3] = {set, from, hash};
+ set_iter(from, set_flatten_merge_i, (st_data_t)args);
+}
+
+/*
+ * call-seq:
+ * flatten -> set
+ *
+ * Returns a new set that is a copy of the set, flattening each
+ * containing set recursively.
+ */
+static VALUE
+set_i_flatten(VALUE set)
+{
+ VALUE new_set = set_s_alloc(rb_obj_class(set));
+ set_flatten_merge(new_set, set, rb_hash_new());
+ return new_set;
+}
+
+static int
+set_contains_set_i(st_data_t item, st_data_t arg)
+{
+ if (rb_obj_is_kind_of(item, rb_cSet)) {
+ *(bool *)arg = true;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * flatten! -> self
+ *
+ * Equivalent to Set#flatten, but replaces the receiver with the
+ * result in place. Returns nil if no modifications were made.
+ */
+static VALUE
+set_i_flatten_bang(VALUE set)
+{
+ bool contains_set = false;
+ set_iter(set, set_contains_set_i, (st_data_t)&contains_set);
+ if (!contains_set) return Qnil;
+ rb_check_frozen(set);
+ return set_i_replace(set, set_i_flatten(set));
+}
+
+struct set_subset_data {
+ set_table *table;
+ VALUE result;
+};
+
+static int
+set_le_i(st_data_t key, st_data_t arg)
+{
+ struct set_subset_data *data = (struct set_subset_data *)arg;
+ if (set_lookup(data->table, key)) return ST_CONTINUE;
+ data->result = Qfalse;
+ return ST_STOP;
+}
+
+static VALUE
+set_le(VALUE set, VALUE other)
+{
+ struct set_subset_data data = {
+ .table = RSET_TABLE(other),
+ .result = Qtrue
+ };
+ set_iter(set, set_le_i, (st_data_t)&data);
+ return data.result;
+}
+
+/*
+ * call-seq:
+ * proper_subset?(set) -> true or false
+ *
+ * Returns true if the set is a proper subset of the given set.
+ */
+static VALUE
+set_i_proper_subset(VALUE set, VALUE other)
+{
+ check_set(other);
+ if (RSET_SIZE(set) >= RSET_SIZE(other)) return Qfalse;
+ return set_le(set, other);
+}
+
+/*
+ * call-seq:
+ * subset?(set) -> true or false
+ *
+ * Returns true if the set is a subset of the given set.
+ */
+static VALUE
+set_i_subset(VALUE set, VALUE other)
+{
+ check_set(other);
+ if (RSET_SIZE(set) > RSET_SIZE(other)) return Qfalse;
+ return set_le(set, other);
+}
+
+/*
+ * call-seq:
+ * proper_superset?(set) -> true or false
+ *
+ * Returns true if the set is a proper superset of the given set.
+ */
+static VALUE
+set_i_proper_superset(VALUE set, VALUE other)
+{
+ check_set(other);
+ if (RSET_SIZE(set) <= RSET_SIZE(other)) return Qfalse;
+ return set_le(other, set);
+}
+
+/*
+ * call-seq:
+ * superset?(set) -> true or false
+ *
+ * Returns true if the set is a superset of the given set.
+ */
+static VALUE
+set_i_superset(VALUE set, VALUE other)
+{
+ check_set(other);
+ if (RSET_SIZE(set) < RSET_SIZE(other)) return Qfalse;
+ return set_le(other, set);
+}
+
+static int
+set_intersect_i(st_data_t key, st_data_t arg)
+{
+ VALUE *args = (VALUE *)arg;
+ if (set_lookup((set_table *)args[0], key)) {
+ args[1] = Qtrue;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * intersect?(set) -> true or false
+ *
+ * Returns true if the set and the given enumerable have at least one
+ * element in common.
+ *
+ * Set[1, 2, 3].intersect? Set[4, 5] #=> false
+ * Set[1, 2, 3].intersect? Set[3, 4] #=> true
+ * Set[1, 2, 3].intersect? 4..5 #=> false
+ * Set[1, 2, 3].intersect? [3, 4] #=> true
+ */
+static VALUE
+set_i_intersect(VALUE set, VALUE other)
+{
+ if (rb_obj_is_kind_of(other, rb_cSet)) {
+ size_t set_size = RSET_SIZE(set);
+ size_t other_size = RSET_SIZE(other);
+ VALUE args[2];
+ args[1] = Qfalse;
+ VALUE iter_arg;
+
+ if (set_size < other_size) {
+ iter_arg = set;
+ args[0] = (VALUE)RSET_TABLE(other);
+ }
+ else {
+ iter_arg = other;
+ args[0] = (VALUE)RSET_TABLE(set);
+ }
+ set_iter(iter_arg, set_intersect_i, (st_data_t)args);
+ return args[1];
+ }
+ else if (rb_obj_is_kind_of(other, rb_mEnumerable)) {
+ return rb_funcall(other, id_any_p, 1, set);
+ }
+ else {
+ rb_raise(rb_eArgError, "value must be enumerable");
+ }
+}
+
+/*
+ * call-seq:
+ * disjoint?(set) -> true or false
+ *
+ * Returns true if the set and the given enumerable have no
+ * element in common. This method is the opposite of +intersect?+.
+ *
+ * Set[1, 2, 3].disjoint? Set[3, 4] #=> false
+ * Set[1, 2, 3].disjoint? Set[4, 5] #=> true
+ * Set[1, 2, 3].disjoint? [3, 4] #=> false
+ * Set[1, 2, 3].disjoint? 4..5 #=> true
+ */
+static VALUE
+set_i_disjoint(VALUE set, VALUE other)
+{
+ return RBOOL(!RTEST(set_i_intersect(set, other)));
+}
+
+/*
+ * call-seq:
+ * set <=> other -> -1, 0, 1, or nil
+ *
+ * Returns 0 if the set are equal, -1 / 1 if the set is a
+ * proper subset / superset of the given set, or or nil if
+ * they both have unique elements.
+ */
+static VALUE
+set_i_compare(VALUE set, VALUE other)
+{
+ if (rb_obj_is_kind_of(other, rb_cSet)) {
+ size_t set_size = RSET_SIZE(set);
+ size_t other_size = RSET_SIZE(other);
+
+ if (set_size < other_size) {
+ if (set_le(set, other) == Qtrue) {
+ return INT2NUM(-1);
+ }
+ }
+ else if (set_size > other_size) {
+ if (set_le(other, set) == Qtrue) {
+ return INT2NUM(1);
+ }
+ }
+ else if (set_le(set, other) == Qtrue) {
+ return INT2NUM(0);
+ }
+ }
+
+ return Qnil;
+}
+
+struct set_equal_data {
+ VALUE result;
+ VALUE set;
+};
+
+static int
+set_eql_i(st_data_t item, st_data_t arg)
+{
+ struct set_equal_data *data = (struct set_equal_data *)arg;
+
+ if (!set_lookup(RSET_TABLE(data->set), item)) {
+ data->result = Qfalse;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+static VALUE
+set_recursive_eql(VALUE set, VALUE dt, int recur)
+{
+ if (recur) return Qtrue;
+ struct set_equal_data *data = (struct set_equal_data*)dt;
+ data->result = Qtrue;
+ set_iter(set, set_eql_i, dt);
+ return data->result;
+}
+
+/*
+ * call-seq:
+ * set == other -> true or false
+ *
+ * Returns true if two sets are equal.
+ */
+static VALUE
+set_i_eq(VALUE set, VALUE other)
+{
+ if (!rb_obj_is_kind_of(other, rb_cSet)) return Qfalse;
+ if (set == other) return Qtrue;
+
+ set_table *stable = RSET_TABLE(set);
+ set_table *otable = RSET_TABLE(other);
+ size_t ssize = set_table_size(stable);
+ size_t osize = set_table_size(otable);
+
+ if (ssize != osize) return Qfalse;
+ if (ssize == 0 && osize == 0) return Qtrue;
+ if (stable->type != otable->type) return Qfalse;
+
+ struct set_equal_data data;
+ data.set = other;
+ return rb_exec_recursive_paired(set_recursive_eql, set, other, (VALUE)&data);
+}
+
+static int
+set_hash_i(st_data_t item, st_data_t(arg))
+{
+ st_index_t *hval = (st_index_t *)arg;
+ st_index_t ival = rb_hash(item);
+ *hval ^= rb_st_hash(&ival, sizeof(st_index_t), 0);
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq:
+ * hash -> integer
+ *
+ * Returns hash code for set.
+ */
+static VALUE
+set_i_hash(VALUE set)
+{
+ st_index_t size = RSET_SIZE(set);
+ st_index_t hval = rb_st_hash_start(size);
+ hval = rb_hash_uint(hval, (st_index_t)set_i_hash);
+ if (size) {
+ set_iter(set, set_hash_i, (VALUE)&hval);
+ }
+ hval = rb_st_hash_end(hval);
+ return ST2FIX(hval);
+}
+
+/*
+ * Document-class: Set
+ *
+ * Copyright (c) 2002-2024 Akinori MUSHA <[email protected]>
+ *
+ * Documentation by Akinori MUSHA and Gavin Sinclair.
+ *
+ * All rights reserved. You can redistribute and/or modify it under the same
+ * terms as Ruby.
+ *
+ * The Set class implements a collection of unordered values with no
+ * duplicates. It is a hybrid of Array's intuitive inter-operation
+ * facilities and Hash's fast lookup.
+ *
+ * Set is easy to use with Enumerable objects (implementing `each`).
+ * Most of the initializer methods and binary operators accept generic
+ * Enumerable objects besides sets and arrays. An Enumerable object
+ * can be converted to Set using the `to_set` method.
+ *
+ * Set uses a data structure similar to Hash for storage, except that
+ * it only has keys and no values.
+ *
+ * * Equality of elements is determined according to Object#eql? and
+ * Object#hash. Use Set#compare_by_identity to make a set compare
+ * its elements by their identity.
+ * * Set assumes that the identity of each element does not change
+ * while it is stored. Modifying an element of a set will render the
+ * set to an unreliable state.
+ * * When a string is to be stored, a frozen copy of the string is
+ * stored instead unless the original string is already frozen.
+ *
+ * == Comparison
+ *
+ * The comparison operators <tt><</tt>, <tt>></tt>, <tt><=</tt>, and
+ * <tt>>=</tt> are implemented as shorthand for the
+ * {proper_,}{subset?,superset?} methods. The <tt><=></tt>
+ * operator reflects this order, or returns +nil+ for sets that both
+ * have distinct elements (<tt>{x, y}</tt> vs. <tt>{x, z}</tt> for example).
+ *
+ * == Example
+ *
+ * s1 = Set[1, 2] #=> #<Set: {1, 2}>
+ * s2 = [1, 2].to_set #=> #<Set: {1, 2}>
+ * s1 == s2 #=> true
+ * s1.add("foo") #=> #<Set: {1, 2, "foo"}>
+ * s1.merge([2, 6]) #=> #<Set: {1, 2, "foo", 6}>
+ * s1.subset?(s2) #=> false
+ * s2.subset?(s1) #=> true
+ *
+ * == Contact
+ *
+ * - Akinori MUSHA <[email protected]> (current maintainer)
+ *
+ * == What's Here
+ *
+ * First, what's elsewhere. \Class \Set:
+ *
+ * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here].
+ * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here],
+ * which provides dozens of additional methods.
+ *
+ * In particular, class \Set does not have many methods of its own
+ * for fetching or for iterating.
+ * Instead, it relies on those in \Enumerable.
+ *
+ * Here, class \Set provides methods that are useful for:
+ *
+ * - {Creating an Array}[rdoc-ref:Array@Methods+for+Creating+an+Array]
+ * - {Creating a Set}[rdoc-ref:Array@Methods+for+Creating+a+Set]
+ * - {Set Operations}[rdoc-ref:Array@Methods+for+Set+Operations]
+ * - {Comparing}[rdoc-ref:Array@Methods+for+Comparing]
+ * - {Querying}[rdoc-ref:Array@Methods+for+Querying]
+ * - {Assigning}[rdoc-ref:Array@Methods+for+Assigning]
+ * - {Deleting}[rdoc-ref:Array@Methods+for+Deleting]
+ * - {Converting}[rdoc-ref:Array@Methods+for+Converting]
+ * - {Iterating}[rdoc-ref:Array@Methods+for+Iterating]
+ * - {And more....}[rdoc-ref:Array@Other+Methods]
+ *
+ * === Methods for Creating a \Set
+ *
+ * - ::[]:
+ * Returns a new set containing the given objects.
+ * - ::new:
+ * Returns a new set containing either the given objects
+ * (if no block given) or the return values from the called block
+ * (if a block given).
+ *
+ * === Methods for \Set Operations
+ *
+ * - #| (aliased as #union and #+):
+ * Returns a new set containing all elements from +self+
+ * and all elements from a given enumerable (no duplicates).
+ * - #& (aliased as #intersection):
+ * Returns a new set containing all elements common to +self+
+ * and a given enumerable.
+ * - #- (aliased as #difference):
+ * Returns a copy of +self+ with all elements
+ * in a given enumerable removed.
+ * - #^: Returns a new set containing all elements from +self+
+ * and a given enumerable except those common to both.
+ *
+ * === Methods for Comparing
+ *
+ * - #<=>: Returns -1, 0, or 1 as +self+ is less than, equal to,
+ * or greater than a given object.
+ * - #==: Returns whether +self+ and a given enumerable are equal,
+ * as determined by Object#eql?.
+ * - #compare_by_identity?:
+ * Returns whether the set considers only identity
+ * when comparing elements.
+ *
+ * === Methods for Querying
+ *
+ * - #length (aliased as #size):
+ * Returns the count of elements.
+ * - #empty?:
+ * Returns whether the set has no elements.
+ * - #include? (aliased as #member? and #===):
+ * Returns whether a given object is an element in the set.
+ * - #subset? (aliased as #<=):
+ * Returns whether a given object is a subset of the set.
+ * - #proper_subset? (aliased as #<):
+ * Returns whether a given enumerable is a proper subset of the set.
+ * - #superset? (aliased as #>=):
+ * Returns whether a given enumerable is a superset of the set.
+ * - #proper_superset? (aliased as #>):
+ * Returns whether a given enumerable is a proper superset of the set.
+ * - #disjoint?:
+ * Returns +true+ if the set and a given enumerable
+ * have no common elements, +false+ otherwise.
+ * - #intersect?:
+ * Returns +true+ if the set and a given enumerable:
+ * have any common elements, +false+ otherwise.
+ * - #compare_by_identity?:
+ * Returns whether the set considers only identity
+ * when comparing elements.
+ *
+ * === Methods for Assigning
+ *
+ * - #add (aliased as #<<):
+ * Adds a given object to the set; returns +self+.
+ * - #add?:
+ * If the given object is not an element in the set,
+ * adds it and returns +self+; otherwise, returns +nil+.
+ * - #merge:
+ * Merges the elements of each given enumerable object to the set; returns +self+.
+ * - #replace:
+ * Replaces the contents of the set with the contents
+ * of a given enumerable.
+ *
+ * === Methods for Deleting
+ *
+ * - #clear:
+ * Removes all elements in the set; returns +self+.
+ * - #delete:
+ * Removes a given object from the set; returns +self+.
+ * - #delete?:
+ * If the given object is an element in the set,
+ * removes it and returns +self+; otherwise, returns +nil+.
+ * - #subtract:
+ * Removes each given object from the set; returns +self+.
+ * - #delete_if - Removes elements specified by a given block.
+ * - #select! (aliased as #filter!):
+ * Removes elements not specified by a given block.
+ * - #keep_if:
+ * Removes elements not specified by a given block.
+ * - #reject!
+ * Removes elements specified by a given block.
+ *
+ * === Methods for Converting
+ *
+ * - #classify:
+ * Returns a hash that classifies the elements,
+ * as determined by the given block.
+ * - #collect! (aliased as #map!):
+ * Replaces each element with a block return-value.
+ * - #divide:
+ * Returns a hash that classifies the elements,
+ * as determined by the given block;
+ * differs from #classify in that the block may accept
+ * either one or two arguments.
+ * - #flatten:
+ * Returns a new set that is a recursive flattening of +self+.
+ * - #flatten!:
+ * Replaces each nested set in +self+ with the elements from that set.
+ * - #inspect (aliased as #to_s):
+ * Returns a string displaying the elements.
+ * - #join:
+ * Returns a string containing all elements, converted to strings
+ * as needed, and joined by the given record separator.
+ * - #to_a:
+ * Returns an array containing all set elements.
+ * - #to_set:
+ * Returns +self+ if given no arguments and no block;
+ * with a block given, returns a new set consisting of block
+ * return values.
+ *
+ * === Methods for Iterating
+ *
+ * - #each:
+ * Calls the block with each successive element; returns +self+.
+ *
+ * === Other Methods
+ *
+ * - #reset:
+ * Resets the internal state; useful if an object
+ * has been modified while an element in the set.
+ *
+ */
+void
+Init_Set(void)
+{
+ rb_cSet = rb_define_class("Set", rb_cObject);
+ rb_include_module(rb_cSet, rb_mEnumerable);
+
+ id_each_entry = rb_intern_const("each_entry");
+ id_any_p = rb_intern_const("any?");
+ id_new = rb_intern_const("new");
+ id_set_iter_lev = rb_make_internal_id();
+
+ rb_define_alloc_func(rb_cSet, set_s_alloc);
+ rb_define_singleton_method(rb_cSet, "[]", set_s_create, -1);
+
+ rb_define_method(rb_cSet, "initialize", set_i_initialize, -1);
+ rb_define_method(rb_cSet, "initialize_copy", set_i_initialize_copy, 1);
+
+ rb_define_method(rb_cSet, "&", set_i_intersection, 1);
+ rb_define_alias(rb_cSet, "intersection", "&");
+ rb_define_method(rb_cSet, "-", set_i_difference, 1);
+ rb_define_alias(rb_cSet, "difference", "-");
+ rb_define_method(rb_cSet, "^", set_i_xor, 1);
+ rb_define_method(rb_cSet, "|", set_i_union, 1);
+ rb_define_alias(rb_cSet, "+", "|");
+ rb_define_alias(rb_cSet, "union", "|");
+ rb_define_method(rb_cSet, "<=>", set_i_compare, 1);
+ rb_define_method(rb_cSet, "==", set_i_eq, 1);
+ rb_define_alias(rb_cSet, "eql?", "==");
+ rb_define_method(rb_cSet, "add", set_i_add, 1);
+ rb_define_alias(rb_cSet, "<<", "add");
+ rb_define_method(rb_cSet, "add?", set_i_add_p, 1);
+ rb_define_method(rb_cSet, "classify", set_i_classify, 0);
+ rb_define_method(rb_cSet, "clear", set_i_clear, 0);
+ rb_define_method(rb_cSet, "collect!", set_i_collect, 0);
+ rb_define_alias(rb_cSet, "map!", "collect!");
+ rb_define_method(rb_cSet, "compare_by_identity", set_i_compare_by_identity, 0);
+ rb_define_method(rb_cSet, "compare_by_identity?", set_i_compare_by_identity_p, 0);
+ rb_define_method(rb_cSet, "delete", set_i_delete, 1);
+ rb_define_method(rb_cSet, "delete?", set_i_delete_p, 1);
+ rb_define_method(rb_cSet, "delete_if", set_i_delete_if, 0);
+ rb_define_method(rb_cSet, "disjoint?", set_i_disjoint, 1);
+ rb_define_method(rb_cSet, "divide", set_i_divide, 0);
+ rb_define_method(rb_cSet, "each", set_i_each, 0);
+ rb_define_method(rb_cSet, "empty?", set_i_empty, 0);
+ rb_define_method(rb_cSet, "flatten", set_i_flatten, 0);
+ rb_define_method(rb_cSet, "flatten!", set_i_flatten_bang, 0);
+ rb_define_method(rb_cSet, "hash", set_i_hash, 0);
+ rb_define_method(rb_cSet, "include?", set_i_include, 1);
+ rb_define_alias(rb_cSet, "member?", "include?");
+ rb_define_alias(rb_cSet, "===", "include?");
+ rb_define_method(rb_cSet, "inspect", set_i_inspect, 0);
+ rb_define_alias(rb_cSet, "to_s", "inspect");
+ rb_define_method(rb_cSet, "intersect?", set_i_intersect, 1);
+ rb_define_method(rb_cSet, "join", set_i_join, -1);
+ rb_define_method(rb_cSet, "keep_if", set_i_keep_if, 0);
+ rb_define_method(rb_cSet, "merge", set_i_merge, -1);
+ rb_define_method(rb_cSet, "proper_subset?", set_i_proper_subset, 1);
+ rb_define_alias(rb_cSet, "<", "proper_subset?");
+ rb_define_method(rb_cSet, "proper_superset?", set_i_proper_superset, 1);
+ rb_define_alias(rb_cSet, ">", "proper_superset?");
+ rb_define_method(rb_cSet, "reject!", set_i_reject, 0);
+ rb_define_method(rb_cSet, "replace", set_i_replace, 1);
+ rb_define_method(rb_cSet, "reset", set_i_reset, 0);
+ rb_define_method(rb_cSet, "size", set_i_size, 0);
+ rb_define_alias(rb_cSet, "length", "size");
+ rb_define_method(rb_cSet, "select!", set_i_select, 0);
+ rb_define_alias(rb_cSet, "filter!", "select!");
+ rb_define_method(rb_cSet, "subset?", set_i_subset, 1);
+ rb_define_alias(rb_cSet, "<=", "subset?");
+ rb_define_method(rb_cSet, "subtract", set_i_subtract, 1);
+ rb_define_method(rb_cSet, "superset?", set_i_superset, 1);
+ rb_define_alias(rb_cSet, ">=", "superset?");
+ rb_define_method(rb_cSet, "to_a", set_i_to_a, 0);
+ rb_define_method(rb_cSet, "to_set", set_i_to_set, -1);
+
+ rb_provide("set.rb");
+}