From 37ed86fd3c798e298fad9db6e7df1f3f45e1e03b Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 8 Jan 2024 10:09:13 -0500 Subject: Fix memory leak in regexp grapheme clusters [Bug #20161] The cc->mbuf gets overwritten, so we need to free it to not leak memory. For example: str = "hello world".encode(Encoding::UTF_32LE) 10.times do 1_000.times do str.grapheme_clusters end puts `ps -o rss= -p #{$$}` end Before: 15536 15760 15920 16144 16304 16480 16640 16784 17008 17280 After: 15584 15584 15760 15824 15888 15888 15888 15888 16048 16112 --- regparse.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'regparse.c') diff --git a/regparse.c b/regparse.c index 1ce15da8f5..57ecd78dee 100644 --- a/regparse.c +++ b/regparse.c @@ -6105,7 +6105,8 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */ R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */ R_ERR(not_code_range_buf(env->enc, cc->mbuf, &inverted_buf, env)); - cc->mbuf = inverted_buf; /* TODO: check what to do with buffer before inversion */ + bbuf_free(cc->mbuf); + cc->mbuf = inverted_buf; env->warnings_flag &= dup_not_warned; /* TODO: fix false warning */ } -- cgit v1.2.3