diff options
author | Nobuyoshi Nakada <[email protected]> | 2023-06-09 16:10:30 +0900 |
---|---|---|
committer | Nobuyoshi Nakada <[email protected]> | 2023-06-09 20:22:30 +0900 |
commit | ab6eb3786c94e69c561080cbb796c2381702a3a4 (patch) | |
tree | 2fe551edd62566939a7760439851fd7ad90b2d13 | |
parent | d54f66d1b4ce32d78b526b1ea9e3f213a763d07c (diff) |
Optimize `Regexp#dup` and `Regexp.new(/RE/)`
When copying from another regexp, copy already built `regex_t` instead
of re-compiling its source.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7922
-rw-r--r-- | benchmark/regexp_dup.yml | 6 | ||||
-rw-r--r-- | benchmark/regexp_new.yml | 7 | ||||
-rw-r--r-- | include/ruby/onigmo.h | 2 | ||||
-rw-r--r-- | re.c | 43 | ||||
-rw-r--r-- | regcomp.c | 74 |
5 files changed, 125 insertions, 7 deletions
diff --git a/benchmark/regexp_dup.yml b/benchmark/regexp_dup.yml new file mode 100644 index 0000000000..52f89991cd --- /dev/null +++ b/benchmark/regexp_dup.yml @@ -0,0 +1,6 @@ +prelude: | + str = "a" * 1000 + re = Regexp.new(str) + +benchmark: + dup: re.dup diff --git a/benchmark/regexp_new.yml b/benchmark/regexp_new.yml new file mode 100644 index 0000000000..bc9ab3ca21 --- /dev/null +++ b/benchmark/regexp_new.yml @@ -0,0 +1,7 @@ +prelude: | + str = "a" * 1000 + re = Regexp.new(str) + +benchmark: + string: Regexp.new(str) + regexp: Regexp.new(re) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 0a5400c3a5..d233336316 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -844,6 +844,8 @@ void onig_free(OnigRegex); ONIG_EXTERN void onig_free_body(OnigRegex); ONIG_EXTERN +int onig_reg_copy(OnigRegex* reg, OnigRegex orig_reg); +ONIG_EXTERN OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option); @@ -3213,6 +3213,15 @@ rb_reg_preprocess_dregexp(VALUE ary, int options) return result; } +static void +rb_reg_initialize_check(VALUE obj) +{ + rb_check_frozen(obj); + if (RREGEXP_PTR(obj)) { + rb_raise(rb_eTypeError, "already initialized regexp"); + } +} + static int rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc, int options, onig_errmsg_buffer err, @@ -3223,10 +3232,7 @@ rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc, rb_encoding *fixed_enc = 0; rb_encoding *a_enc = rb_ascii8bit_encoding(); - rb_check_frozen(obj); - if (re->ptr) - rb_raise(rb_eTypeError, "already initialized regexp"); - re->ptr = 0; + rb_reg_initialize_check(obj); if (rb_enc_dummy_p(enc)) { errcpy(err, "can't make regexp with dummy encoding"); @@ -3862,6 +3868,24 @@ set_timeout(rb_hrtime_t *hrt, VALUE timeout) double2hrtime(hrt, timeout_d); } +static VALUE +reg_copy(VALUE copy, VALUE orig) +{ + int r; + regex_t *re; + + rb_reg_initialize_check(copy); + if ((r = onig_reg_copy(&re, RREGEXP_PTR(orig))) != 0) { + /* ONIGERR_MEMORY only */ + rb_raise(rb_eRegexpError, "%s", onig_error_code_to_format(r)); + } + RREGEXP_PTR(copy) = re; + RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src); + RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit; + rb_enc_copy(copy, orig); + return copy; +} + struct reg_init_args { VALUE str; VALUE timeout; @@ -3931,9 +3955,14 @@ static VALUE rb_reg_initialize_m(int argc, VALUE *argv, VALUE self) { struct reg_init_args args; + VALUE re = reg_extract_args(argc, argv, &args); - reg_extract_args(argc, argv, &args); - reg_init_args(self, args.str, args.enc, args.flags); + if (NIL_P(re)) { + reg_init_args(self, args.str, args.enc, args.flags); + } + else { + reg_copy(self, re); + } set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout); @@ -4356,7 +4385,7 @@ rb_reg_init_copy(VALUE copy, VALUE re) { if (!OBJ_INIT_COPY(copy, re)) return copy; rb_reg_check(re); - return rb_reg_init_str(copy, RREGEXP_SRC(re), rb_reg_options(re)); + return reg_copy(copy, re); } VALUE @@ -5671,6 +5671,80 @@ onig_free(regex_t* reg) } } +static void* +dup_copy(const void *ptr, size_t size) +{ + void *newptr = xmalloc(size); + if (IS_NOT_NULL(newptr)) { + memcpy(newptr, ptr, size); + } + return newptr; +} + +extern int +onig_reg_copy(regex_t** nreg, regex_t* oreg) +{ + if (IS_NOT_NULL(oreg)) { + regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(reg)) return ONIGERR_MEMORY; + + *reg = *oreg; + +# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size)) + + if (IS_NOT_NULL(reg->exact)) { + size_t exact_size = reg->exact_end - reg->exact; + if (COPY_FAILED(exact, exact_size)) + goto err; + (reg)->exact_end = (reg)->exact + exact_size; + } + + if (IS_NOT_NULL(reg->int_map)) { + if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) + goto err_int_map; + } + if (IS_NOT_NULL(reg->int_map_backward)) { + if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) + goto err_int_map_backward; + } + if (IS_NOT_NULL(reg->p)) { + if (COPY_FAILED(p, reg->alloc)) + goto err_p; + } + if (IS_NOT_NULL(reg->repeat_range)) { + if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange))) + goto err_repeat_range; + } + if (IS_NOT_NULL(reg->name_table)) { + if (IS_NULL(reg->name_table = st_copy(reg->name_table))) + goto err_name_table; + } + if (IS_NOT_NULL(reg->chain)) { + if (onig_reg_copy(®->chain, reg->chain)) + goto err_chain; + } + return 0; +# undef COPY_FAILED + + err_chain: + onig_st_free_table(reg->name_table); + err_name_table: + xfree(reg->repeat_range); + err_repeat_range: + xfree(reg->p); + err_p: + xfree(reg->int_map_backward); + err_int_map_backward: + xfree(reg->int_map); + err_int_map: + xfree(reg->exact); + err: + xfree(reg); + return ONIGERR_MEMORY; + } + return 0; +} + #ifdef RUBY size_t onig_memsize(const regex_t *reg) |