summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <[email protected]>2023-06-09 16:10:30 +0900
committerNobuyoshi Nakada <[email protected]>2023-06-09 20:22:30 +0900
commitab6eb3786c94e69c561080cbb796c2381702a3a4 (patch)
tree2fe551edd62566939a7760439851fd7ad90b2d13
parentd54f66d1b4ce32d78b526b1ea9e3f213a763d07c (diff)
Optimize `Regexp#dup` and `Regexp.new(/RE/)`
When copying from another regexp, copy already built `regex_t` instead of re-compiling its source.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/7922
-rw-r--r--benchmark/regexp_dup.yml6
-rw-r--r--benchmark/regexp_new.yml7
-rw-r--r--include/ruby/onigmo.h2
-rw-r--r--re.c43
-rw-r--r--regcomp.c74
5 files changed, 125 insertions, 7 deletions
diff --git a/benchmark/regexp_dup.yml b/benchmark/regexp_dup.yml
new file mode 100644
index 0000000000..52f89991cd
--- /dev/null
+++ b/benchmark/regexp_dup.yml
@@ -0,0 +1,6 @@
+prelude: |
+ str = "a" * 1000
+ re = Regexp.new(str)
+
+benchmark:
+ dup: re.dup
diff --git a/benchmark/regexp_new.yml b/benchmark/regexp_new.yml
new file mode 100644
index 0000000000..bc9ab3ca21
--- /dev/null
+++ b/benchmark/regexp_new.yml
@@ -0,0 +1,7 @@
+prelude: |
+ str = "a" * 1000
+ re = Regexp.new(str)
+
+benchmark:
+ string: Regexp.new(str)
+ regexp: Regexp.new(re)
diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h
index 0a5400c3a5..d233336316 100644
--- a/include/ruby/onigmo.h
+++ b/include/ruby/onigmo.h
@@ -844,6 +844,8 @@ void onig_free(OnigRegex);
ONIG_EXTERN
void onig_free_body(OnigRegex);
ONIG_EXTERN
+int onig_reg_copy(OnigRegex* reg, OnigRegex orig_reg);
+ONIG_EXTERN
OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg);
ONIG_EXTERN
OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
diff --git a/re.c b/re.c
index f6abf46131..5fc005552f 100644
--- a/re.c
+++ b/re.c
@@ -3213,6 +3213,15 @@ rb_reg_preprocess_dregexp(VALUE ary, int options)
return result;
}
+static void
+rb_reg_initialize_check(VALUE obj)
+{
+ rb_check_frozen(obj);
+ if (RREGEXP_PTR(obj)) {
+ rb_raise(rb_eTypeError, "already initialized regexp");
+ }
+}
+
static int
rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc,
int options, onig_errmsg_buffer err,
@@ -3223,10 +3232,7 @@ rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc,
rb_encoding *fixed_enc = 0;
rb_encoding *a_enc = rb_ascii8bit_encoding();
- rb_check_frozen(obj);
- if (re->ptr)
- rb_raise(rb_eTypeError, "already initialized regexp");
- re->ptr = 0;
+ rb_reg_initialize_check(obj);
if (rb_enc_dummy_p(enc)) {
errcpy(err, "can't make regexp with dummy encoding");
@@ -3862,6 +3868,24 @@ set_timeout(rb_hrtime_t *hrt, VALUE timeout)
double2hrtime(hrt, timeout_d);
}
+static VALUE
+reg_copy(VALUE copy, VALUE orig)
+{
+ int r;
+ regex_t *re;
+
+ rb_reg_initialize_check(copy);
+ if ((r = onig_reg_copy(&re, RREGEXP_PTR(orig))) != 0) {
+ /* ONIGERR_MEMORY only */
+ rb_raise(rb_eRegexpError, "%s", onig_error_code_to_format(r));
+ }
+ RREGEXP_PTR(copy) = re;
+ RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src);
+ RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit;
+ rb_enc_copy(copy, orig);
+ return copy;
+}
+
struct reg_init_args {
VALUE str;
VALUE timeout;
@@ -3931,9 +3955,14 @@ static VALUE
rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
{
struct reg_init_args args;
+ VALUE re = reg_extract_args(argc, argv, &args);
- reg_extract_args(argc, argv, &args);
- reg_init_args(self, args.str, args.enc, args.flags);
+ if (NIL_P(re)) {
+ reg_init_args(self, args.str, args.enc, args.flags);
+ }
+ else {
+ reg_copy(self, re);
+ }
set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout);
@@ -4356,7 +4385,7 @@ rb_reg_init_copy(VALUE copy, VALUE re)
{
if (!OBJ_INIT_COPY(copy, re)) return copy;
rb_reg_check(re);
- return rb_reg_init_str(copy, RREGEXP_SRC(re), rb_reg_options(re));
+ return reg_copy(copy, re);
}
VALUE
diff --git a/regcomp.c b/regcomp.c
index be85d85f93..b4dbddfa01 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -5671,6 +5671,80 @@ onig_free(regex_t* reg)
}
}
+static void*
+dup_copy(const void *ptr, size_t size)
+{
+ void *newptr = xmalloc(size);
+ if (IS_NOT_NULL(newptr)) {
+ memcpy(newptr, ptr, size);
+ }
+ return newptr;
+}
+
+extern int
+onig_reg_copy(regex_t** nreg, regex_t* oreg)
+{
+ if (IS_NOT_NULL(oreg)) {
+ regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(reg)) return ONIGERR_MEMORY;
+
+ *reg = *oreg;
+
+# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
+
+ if (IS_NOT_NULL(reg->exact)) {
+ size_t exact_size = reg->exact_end - reg->exact;
+ if (COPY_FAILED(exact, exact_size))
+ goto err;
+ (reg)->exact_end = (reg)->exact + exact_size;
+ }
+
+ if (IS_NOT_NULL(reg->int_map)) {
+ if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
+ goto err_int_map;
+ }
+ if (IS_NOT_NULL(reg->int_map_backward)) {
+ if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
+ goto err_int_map_backward;
+ }
+ if (IS_NOT_NULL(reg->p)) {
+ if (COPY_FAILED(p, reg->alloc))
+ goto err_p;
+ }
+ if (IS_NOT_NULL(reg->repeat_range)) {
+ if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
+ goto err_repeat_range;
+ }
+ if (IS_NOT_NULL(reg->name_table)) {
+ if (IS_NULL(reg->name_table = st_copy(reg->name_table)))
+ goto err_name_table;
+ }
+ if (IS_NOT_NULL(reg->chain)) {
+ if (onig_reg_copy(&reg->chain, reg->chain))
+ goto err_chain;
+ }
+ return 0;
+# undef COPY_FAILED
+
+ err_chain:
+ onig_st_free_table(reg->name_table);
+ err_name_table:
+ xfree(reg->repeat_range);
+ err_repeat_range:
+ xfree(reg->p);
+ err_p:
+ xfree(reg->int_map_backward);
+ err_int_map_backward:
+ xfree(reg->int_map);
+ err_int_map:
+ xfree(reg->exact);
+ err:
+ xfree(reg);
+ return ONIGERR_MEMORY;
+ }
+ return 0;
+}
+
#ifdef RUBY
size_t
onig_memsize(const regex_t *reg)