diff options
author | NagayamaRyoga <[email protected]> | 2019-12-18 19:26:02 +0900 |
---|---|---|
committer | Alan Wu <[email protected]> | 2020-02-09 11:33:38 -0500 |
commit | 6e5e6a40c4c35aee1cfb7d0effa47354f80baa9e (patch) | |
tree | 380979cd316b881b12cc7d2e5b0c8185bdaa6a63 /compile.c | |
parent | 2079f436c7d4047cb09af005e9f8eb6fbf256000 (diff) |
Deduplicate objects efficiently when dumping iseq to binary
We were inefficient in cases where there are a lot of duplicates due to
the use of linear search. Use a hash table instead.
These cases are not that rare in the wild.
[Feature #16505]
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/2835
Diffstat (limited to 'compile.c')
-rw-r--r-- | compile.c | 41 |
1 files changed, 25 insertions, 16 deletions
@@ -9520,7 +9520,8 @@ struct ibf_header { struct ibf_dump_buffer { VALUE str; - VALUE obj_list; /* [objs] */ + VALUE obj_list; /* [objs] */ + st_table *obj_table; /* obj -> obj number */ }; struct ibf_dump { @@ -9666,25 +9667,26 @@ static VALUE ibf_load_object(const struct ibf_load *load, VALUE object_index); static rb_iseq_t *ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq); static VALUE -ibf_dump_object_list_new(void) +ibf_dump_object_list_new(st_table **obj_table) { VALUE obj_list = rb_ary_tmp_new(1); rb_ary_push(obj_list, Qnil); /* 0th is nil */ + *obj_table = st_init_numtable(); /* need free */ + rb_st_insert(*obj_table, (st_data_t)Qnil, (st_data_t)0); /* 0th is nil */ + return obj_list; } static VALUE ibf_dump_object(struct ibf_dump *dump, VALUE obj) { - VALUE obj_list = dump->current_buffer->obj_list; - long index = RARRAY_LEN(obj_list); - long i; - for (i=0; i<index; i++) { - if (RARRAY_AREF(obj_list, i) == obj) return (VALUE)i; /* dedup */ + int obj_index = ibf_table_lookup(dump->current_buffer->obj_table, (st_data_t)obj); + if (obj_index < 0) { + obj_index = ibf_table_index(dump->current_buffer->obj_table, (st_data_t)obj); + rb_ary_push(dump->current_buffer->obj_list, obj); } - rb_ary_push(obj_list, obj); - return (VALUE)index; + return obj_index; } static VALUE @@ -10372,7 +10374,7 @@ ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq) struct ibf_dump_buffer *saved_buffer = dump->current_buffer; struct ibf_dump_buffer buffer; buffer.str = rb_str_new(0, 0); - buffer.obj_list = ibf_dump_object_list_new(); + buffer.obj_list = ibf_dump_object_list_new(&buffer.obj_table); dump->current_buffer = &buffer; #endif @@ -10477,6 +10479,8 @@ ibf_dump_iseq_each(struct ibf_dump *dump, const rb_iseq_t *iseq) ibf_dump_write_small_value(dump, local_obj_list_offset); ibf_dump_write_small_value(dump, local_obj_list_size); + rb_st_free_table(buffer.obj_table); + return offset; #else return body_offset; @@ -10504,15 +10508,15 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset) struct ibf_load_buffer *saved_buffer = load->current_buffer; load->current_buffer = &load->global_buffer; - const ibf_offset_t iseq_start = ibf_load_small_value(load, &reading_pos); - const ibf_offset_t iseq_length_bytes = ibf_load_small_value(load, &reading_pos); - const ibf_offset_t body_offset = ibf_load_small_value(load, &reading_pos); + const ibf_offset_t iseq_start = (ibf_offset_t)ibf_load_small_value(load, &reading_pos); + const ibf_offset_t iseq_length_bytes = (ibf_offset_t)ibf_load_small_value(load, &reading_pos); + const ibf_offset_t body_offset = (ibf_offset_t)ibf_load_small_value(load, &reading_pos); struct ibf_load_buffer buffer; buffer.buff = load->global_buffer.buff + iseq_start; buffer.size = iseq_length_bytes; - buffer.obj_list_offset = ibf_load_small_value(load, &reading_pos); - buffer.obj_list_size = ibf_load_small_value(load, &reading_pos); + buffer.obj_list_offset = (ibf_offset_t)ibf_load_small_value(load, &reading_pos); + buffer.obj_list_size = (ibf_offset_t)ibf_load_small_value(load, &reading_pos); buffer.obj_list = rb_ary_tmp_new(buffer.obj_list_size); rb_ary_resize(buffer.obj_list, buffer.obj_list_size); @@ -11338,6 +11342,10 @@ static void ibf_dump_free(void *ptr) { struct ibf_dump *dump = (struct ibf_dump *)ptr; + if (dump->global_buffer.obj_table) { + st_free_table(dump->global_buffer.obj_table); + dump->global_buffer.obj_table = 0; + } if (dump->iseq_table) { st_free_table(dump->iseq_table); dump->iseq_table = 0; @@ -11351,6 +11359,7 @@ ibf_dump_memsize(const void *ptr) struct ibf_dump *dump = (struct ibf_dump *)ptr; size_t size = sizeof(*dump); if (dump->iseq_table) size += st_memsize(dump->iseq_table); + if (dump->global_buffer.obj_table) size += st_memsize(dump->global_buffer.obj_table); return size; } @@ -11364,7 +11373,7 @@ static void ibf_dump_setup(struct ibf_dump *dump, VALUE dumper_obj) { RB_OBJ_WRITE(dumper_obj, &dump->iseq_list, rb_ary_tmp_new(0)); - RB_OBJ_WRITE(dumper_obj, &dump->global_buffer.obj_list, ibf_dump_object_list_new()); + RB_OBJ_WRITE(dumper_obj, &dump->global_buffer.obj_list, ibf_dump_object_list_new(&dump->global_buffer.obj_table)); RB_OBJ_WRITE(dumper_obj, &dump->global_buffer.str, rb_str_new(0, 0)); dump->iseq_table = st_init_numtable(); /* need free */ |