]> perl5.git.perl.org Git - perl5.git/commitdiff This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
safer cleanup when failing to compile regexps
authorTony Cook <[email protected]>
Wed, 29 Nov 2023 04:51:17 +0000 (15:51 +1100)
committermauke <[email protected]>
Sun, 4 Aug 2024 12:02:07 +0000 (14:02 +0200)
Prior to this commit when producing a warning the regexp compiler
would check if the warning category was marked as FATAL, and if it was
it would add clean up to the save stack to release buffers used during
compilation and to release the working REGEXP SV.

This causes two type of problems:

- if an error was already queued, Perl_ck_warner() returns even if
  the warning is fatal, this meant that the normal clean up code
  Perl_re_op_compile() would also run, resulting in a double free
  of the buffers.

- without fatal warnings, if a $SIG{__WARN__} handler died, the
  buffers and the working REGEXP SV would leak.

Avoid this by using SAVEDESTRUCTOR_X() to release the memory and
optionally the SV at the end of scope.

Fixes #21661

regcomp.c
regcomp_internal.h

index e7aefb79c78fd071ffa0be676378c8bad605433b..1c95bb780c15e00f57184218afd2bffb88fbc024 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1356,6 +1356,24 @@ S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
     return TRUE;
 }
 
+static void
+release_RExC_state(pTHX_ void *vstate) {
+    RExC_state_t *pRExC_state = (RExC_state_t *)vstate;
+
+    /* Any or all of these might be NULL.
+
+       There's no point in setting them to NULL after the free, since
+       pRExC_state is about to be released.
+     */
+    SvREFCNT_dec(RExC_rx_sv);
+    Safefree(RExC_open_parens);
+    Safefree(RExC_close_parens);
+    Safefree(RExC_logical_to_parno);
+    Safefree(RExC_parno_to_logical);
+
+    Safefree(pRExC_state);
+}
+
 /*
  * Perl_re_op_compile - the perl internal RE engine's function to compile a
  * regular expression into internal code.
@@ -1437,8 +1455,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     bool recompile = 0;
     bool runtime_code = 0;
     scan_data_t data;
-    RExC_state_t RExC_state;
-    RExC_state_t * const pRExC_state = &RExC_state;
 
 #ifdef TRIE_STUDY_OPT
     /* search for "restudy" in this file for a detailed explanation */
@@ -1449,25 +1465,28 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     PERL_ARGS_ASSERT_RE_OP_COMPILE;
 
+    DEBUG_r(if (!PL_colorset) reginitcolors());
+
+    RExC_state_t *pRExC_state = NULL;
     /* Ensure that all members of the pRExC_state is initialized to 0
      * at the start of regex compilation. Historically we have had issues
      * with people remembering to zero specific members or zeroing them
      * too late, etc. Doing it in one place is saner and avoid oversight
      * or error. */
-    Zero(pRExC_state,1,RExC_state_t);
+    Newxz(pRExC_state, 1, RExC_state_t);
+
+    SAVEDESTRUCTOR_X(release_RExC_state, pRExC_state);
+
     DEBUG_r({
         /* and then initialize RExC_mysv1 and RExC_mysv2 early so if
          * something calls regprop we don't have issues. These variables
-         * not being set up properly motivated the use of Zero() to initalize
+         * not being set up properly motivated the use of Newxz() to initalize
          * the pRExC_state structure, as there were codepaths under -Uusedl
          * that left these unitialized, and non-null as well. */
         RExC_mysv1 = sv_newmortal();
         RExC_mysv2 = sv_newmortal();
     });
 
-    DEBUG_r(if (!PL_colorset) reginitcolors());
-
-
     if (is_bare_re)
         *is_bare_re = FALSE;
 
@@ -1840,6 +1859,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
         /* Clean up what we did in this parse */
         SvREFCNT_dec_NN(RExC_rx_sv);
+        RExC_rx_sv = NULL;
 
         goto redo_parse;
     }
@@ -1915,12 +1935,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* search for "restudy" in this file for a detailed explanation */
     if (!restudied) {
         StructCopy(&zero_scan_data, &data, scan_data_t);
-        copyRExC_state = RExC_state;
+        copyRExC_state = *pRExC_state;
     } else {
         U32 seen=RExC_seen;
         DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "Restudying\n"));
 
-        RExC_state = copyRExC_state;
+        *pRExC_state = copyRExC_state;
         if (seen & REG_TOP_LEVEL_BRANCHES_SEEN)
             RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN;
         else
@@ -2439,22 +2459,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         regdump(RExC_rx);
     });
 
-    if (RExC_open_parens) {
-        Safefree(RExC_open_parens);
-        RExC_open_parens = NULL;
-    }
-    if (RExC_close_parens) {
-        Safefree(RExC_close_parens);
-        RExC_close_parens = NULL;
-    }
-    if (RExC_logical_to_parno) {
-        Safefree(RExC_logical_to_parno);
-        RExC_logical_to_parno = NULL;
-    }
-    if (RExC_parno_to_logical) {
-        Safefree(RExC_parno_to_logical);
-        RExC_parno_to_logical = NULL;
-    }
+    /* we're returning ownership of the SV to the caller, ensure the cleanup
+     * doesn't release it
+     */
+    RExC_rx_sv = NULL;
 
 #ifdef USE_ITHREADS
     /* under ithreads the ?pat? PMf_USED flag on the pmop is simulated
@@ -9344,7 +9352,6 @@ S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
                                             array is mortal, but is a
                                             fail-safe */
             (void) sv_2mortal(msg);
-            PREPARE_TO_DIE;
         }
         Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
         SvREFCNT_dec_NN(msg);
index b8db3c15f1aae20d9ea533042069e590b2f26923..b33fa1ac51a5e03af7a50e889fe7a3b88a2fbc92 100644 (file)
@@ -894,7 +894,6 @@ static const scan_data_t zero_scan_data = {
     const char *ellipses = "";                                          \
     IV len = RExC_precomp_end - RExC_precomp;                           \
                                                                         \
-    PREPARE_TO_DIE;                                                     \
     if (len > RegexLengthToShowInErrorMessages) {                       \
         /* chop 10 shorter than the max, to ensure meaning of "..." */  \
         len = RegexLengthToShowInErrorMessages - 10;                    \
@@ -927,7 +926,6 @@ static const scan_data_t zero_scan_data = {
  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
  */
 #define vFAIL(m) STMT_START {                           \
-    PREPARE_TO_DIE;                                     \
     Simple_vFAIL(m);                                    \
 } STMT_END
 
@@ -943,7 +941,6 @@ static const scan_data_t zero_scan_data = {
  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
  */
 #define vFAIL2(m,a1) STMT_START {                       \
-    PREPARE_TO_DIE;                                     \
     Simple_vFAIL2(m, a1);                               \
 } STMT_END
 
@@ -960,7 +957,6 @@ static const scan_data_t zero_scan_data = {
  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
  */
 #define vFAIL3(m,a1,a2) STMT_START {                    \
-    PREPARE_TO_DIE;                                     \
     Simple_vFAIL3(m, a1, a2);                           \
 } STMT_END
 
@@ -973,19 +969,16 @@ static const scan_data_t zero_scan_data = {
 } STMT_END
 
 #define vFAIL4(m,a1,a2,a3) STMT_START {                 \
-    PREPARE_TO_DIE;                                     \
     Simple_vFAIL4(m, a1, a2, a3);                       \
 } STMT_END
 
 /* A specialized version of vFAIL2 that works with UTF8f */
 #define vFAIL2utf8f(m, a1) STMT_START {             \
-    PREPARE_TO_DIE;                                 \
     S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,  \
             REPORT_LOCATION_ARGS(RExC_parse));      \
 } STMT_END
 
 #define vFAIL3utf8f(m, a1, a2) STMT_START {             \
-    PREPARE_TO_DIE;                                     \
     S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,  \
             REPORT_LOCATION_ARGS(RExC_parse));          \
 } STMT_END
@@ -1026,8 +1019,6 @@ static const scan_data_t zero_scan_data = {
                               __FILE__, __LINE__, loc);                 \
         }                                                               \
         if (TO_OUTPUT_WARNINGS(loc)) {                                  \
-            if (ckDEAD(warns))                                          \
-                PREPARE_TO_DIE;                                         \
             code;                                                       \
             UPDATE_WARNINGS_LOC(loc);                                   \
         }                                                               \