-
Notifications
You must be signed in to change notification settings - Fork 7.8k
Store JIT/non-JIT regex in a different PCRE cache slot #11396
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
a0fb2f3
to
ddf1700
Compare
99838b0
to
d9879af
Compare
Can this be classified as a bug and landed into PHP 8.1? Currently, when |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've looked into this a little bit closer. One downside is that we always get two different caches, although when disabling the JIT we could re-use regexes that were compiled with JIT support. The match functions are just missing the appropriate checks. Additionally, we'd need to run JIT compilation for regexes that were previously compiled with JIT. That has the unwanted side-effect of repeatedly trying to JIT regexes that fail. Below is a partial implementation for what I'm mentioning here, but I don't think this is worth it considering disabling JIT is not usually something done at runtime. So all things considered, this PR is probably the better approach.
Backporting this should also be fine. Please use concat2
/no concat when compiling without JIT. There's no need for the trailing 0 in that case.
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 6ad0b6eb76..c892139e9b 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -585,6 +585,32 @@ static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, co
}
/* }}} */
+#ifdef HAVE_PCRE_JIT_SUPPORT
+static zend_result jit_compile(pcre2_code *re, uint32_t *poptions) {
+ int rc;
+ /* Enable PCRE JIT compiler */
+ rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
+ if (EXPECTED(rc >= 0)) {
+ size_t jit_size = 0;
+ if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
+ *poptions |= PREG_JIT;
+ }
+ } else if (rc == PCRE2_ERROR_NOMEMORY) {
+ php_error_docref(NULL, E_WARNING,
+ "Allocation of JIT memory failed, PCRE JIT will be disabled. "
+ "This is likely caused by security restrictions. "
+ "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
+ PCRE_G(jit) = 0;
+ } else {
+ PCRE2_UCHAR error[128];
+ pcre2_get_error_message(rc, error, sizeof(error));
+ php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
+ }
+ return rc;
+}
+#endif
+
/* {{{ pcre_get_compiled_regex_cache */
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
{
@@ -806,24 +832,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
#ifdef HAVE_PCRE_JIT_SUPPORT
if (PCRE_G(jit)) {
- /* Enable PCRE JIT compiler */
- rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
- if (EXPECTED(rc >= 0)) {
- size_t jit_size = 0;
- if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
- poptions |= PREG_JIT;
- }
- } else if (rc == PCRE2_ERROR_NOMEMORY) {
- php_error_docref(NULL, E_WARNING,
- "Allocation of JIT memory failed, PCRE JIT will be disabled. "
- "This is likely caused by security restrictions. "
- "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
- PCRE_G(jit) = 0;
- } else {
- pcre2_get_error_message(rc, error, sizeof(error));
- php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
- pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
- }
+ jit_compile(re, &poptions);
}
#endif
efree(pattern);
@@ -1263,10 +1272,17 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
+ if (options && PCRE_G(jit)) {
+ if (!(pce->preg_options & PREG_JIT)) {
+ jit_compile(pce->re, &pce->preg_options);
+ if (!(pce->preg_options & PREG_JIT)) {
+ goto no_jit;
+ }
+ }
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
+no_jit:
#endif
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
options, match_data, mctx);
@@ -1405,14 +1421,21 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT)) {
+ if (PCRE_G(jit)) {
if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
break;
}
+ if (!(pce->preg_options & PREG_JIT)) {
+ jit_compile(pce->re, &pce->preg_options);
+ if (!(pce->preg_options & PREG_JIT)) {
+ goto no_jit_offset;
+ }
+ }
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
+no_jit_offset:
#endif
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
PCRE2_NO_UTF_CHECK, match_data, mctx);
@@ -1625,7 +1648,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
+ if ((pce->preg_options & PREG_JIT) && options && PCRE_G(jit)) {
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
@@ -1800,7 +1823,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
}
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (pce->preg_options & PREG_JIT) {
+ if ((pce->preg_options & PREG_JIT) && PCRE_G(jit)) {
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
@@ -1881,7 +1904,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
+ if ((pce->preg_options & PREG_JIT) && options && PCRE_G(jit)) {
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
@@ -2008,7 +2031,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
break;
}
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT)) {
+ if ((pce->preg_options & PREG_JIT) && PCRE_G(jit)) {
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
@@ -2551,7 +2574,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
+ if ((pce->preg_options & PREG_JIT) && options && PCRE_G(jit)) {
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
@@ -2654,7 +2677,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
}
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (pce->preg_options & PREG_JIT) {
+ if ((pce->preg_options & PREG_JIT) && PCRE_G(jit)) {
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
@@ -2894,7 +2917,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
/* Perform the match */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((pce->preg_options & PREG_JIT) && options) {
+ if ((pce->preg_options & PREG_JIT) && options && PCRE_G(jit)) {
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
Yes, switching
@iluuu1994 about zend_string_concat3/zend_string_concat2, I do not append |
I misread that. Either way, |
d9879af
to
e86011c
Compare
done |
@iluuu1994 can this fix be merged? |
Thanks @mvorisek! |
I reverted this commit because it caused a significant performance regression due to the additional allocation in the else branch. I'll try a different solution tomorrow. |
pcre.jit
php.ini option can be changed on runtime, so we must make sure the option is always honored even if the regex string was already cachedas advised in #11374 (comment)
performance impact should be minimal