Skip to content

Commit c4e8f65

Browse files
committed
Update bundled pcre2 to 10.42
Closes GH-12109.
1 parent d68073c commit c4e8f65

36 files changed

+10010
-3586
lines changed

NEWS

+3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ PHP NEWS
2929
. Fixed persistent procedural ODBC connections not getting closed.
3030
(NattyNarwhal)
3131

32+
- PCRE:
33+
. Update bundled libpcre2 to 10.42. (nielsdos)
34+
3235
- SPL:
3336
. Fixed bug GH-11972 (RecursiveCallbackFilterIterator regression in 8.1.18).
3437
(nielsdos)

ext/pcre/pcre2lib/pcre2.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
4242
/* The current PCRE version information. */
4343

4444
#define PCRE2_MAJOR 10
45-
#define PCRE2_MINOR 40
45+
#define PCRE2_MINOR 42
4646
#define PCRE2_PRERELEASE
47-
#define PCRE2_DATE 2022-04-14
47+
#define PCRE2_DATE 2022-12-12
4848

4949
/* When an application links to a PCRE DLL in Windows, the symbols that are
5050
imported have to be identified as such. When building PCRE2, the appropriate

ext/pcre/pcre2lib/pcre2_compile.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -1266,8 +1266,10 @@ PCRE2_SIZE* ref_count;
12661266

12671267
if (code != NULL)
12681268
{
1269+
#ifdef SUPPORT_JIT
12691270
if (code->executable_jit != NULL)
12701271
PRIV(jit_free)(code->executable_jit, &code->memctl);
1272+
#endif
12711273

12721274
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
12731275
{
@@ -2687,7 +2689,7 @@ if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
26872689
while (ptr < ptrend)
26882690
{
26892691
int prev_expect_cond_assert;
2690-
uint32_t min_repeat, max_repeat;
2692+
uint32_t min_repeat = 0, max_repeat = 0;
26912693
uint32_t set, unset, *optset;
26922694
uint32_t terminator;
26932695
uint32_t prev_meta_quantifier;
@@ -8552,7 +8554,7 @@ do {
85528554
op == OP_SCBRA || op == OP_SCBRAPOS)
85538555
{
85548556
int n = GET2(scode, 1+LINK_SIZE);
8555-
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
8557+
unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
85568558
if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE;
85578559
}
85588560

@@ -10620,4 +10622,10 @@ re = NULL;
1062010622
goto EXIT;
1062110623
}
1062210624

10625+
/* These #undefs are here to enable unity builds with CMake. */
10626+
10627+
#undef NLBLOCK /* Block containing newline information */
10628+
#undef PSSTART /* Field containing processed string start */
10629+
#undef PSEND /* Field containing processed string end */
10630+
1062310631
/* End of pcre2_compile.c */

ext/pcre/pcre2lib/pcre2_context.c

+9-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
77
88
Written by Philip Hazel
99
Original API code Copyright (c) 1997-2012 University of Cambridge
10-
New API code Copyright (c) 2016-2018 University of Cambridge
10+
New API code Copyright (c) 2016-2022 University of Cambridge
1111
1212
-----------------------------------------------------------------------------
1313
Redistribution and use in source and binary forms, with or without
@@ -443,8 +443,11 @@ mcontext->offset_limit = limit;
443443
return 0;
444444
}
445445

446-
/* This function became obsolete at release 10.30. It is kept as a synonym for
447-
backwards compatibility. */
446+
/* These functions became obsolete at release 10.30. The first is kept as a
447+
synonym for backwards compatibility. The second now does nothing. Exclude both
448+
from coverage reports. */
449+
450+
/* LCOV_EXCL_START */
448451

449452
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
450453
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
@@ -464,6 +467,9 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
464467
return 0;
465468
}
466469

470+
/* LCOV_EXCL_STOP */
471+
472+
467473
/* ------------ Convert context ------------ */
468474

469475
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION

ext/pcre/pcre2lib/pcre2_convert.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
77
88
Written by Philip Hazel
99
Original API code Copyright (c) 1997-2012 University of Cambridge
10-
New API code Copyright (c) 2016-2018 University of Cambridge
10+
New API code Copyright (c) 2016-2022 University of Cambridge
1111
1212
-----------------------------------------------------------------------------
1313
Redistribution and use in source and binary forms, with or without
@@ -65,9 +65,8 @@ POSSIBILITY OF SUCH DAMAGE.
6565
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
6666
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
6767

68-
/* States for range and POSIX processing */
68+
/* States for POSIX processing */
6969

70-
enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED };
7170
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
7271
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
7372

ext/pcre/pcre2lib/pcre2_dfa_match.c

+9-3
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ Returns: the return from the callout
350350
*/
351351

352352
static int
353-
do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
353+
do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
354354
PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
355355
PCRE2_SIZE *lengthptr)
356356
{
@@ -2799,7 +2799,7 @@ for (;;)
27992799
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
28002800
{
28012801
PCRE2_SIZE callout_length;
2802-
rrc = do_callout(code, offsets, current_subject, ptr, mb,
2802+
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb,
28032803
1 + LINK_SIZE, &callout_length);
28042804
if (rrc < 0) return rrc; /* Abandon */
28052805
if (rrc > 0) break; /* Fail this thread */
@@ -3196,7 +3196,7 @@ for (;;)
31963196
case OP_CALLOUT_STR:
31973197
{
31983198
PCRE2_SIZE callout_length;
3199-
rrc = do_callout(code, offsets, current_subject, ptr, mb, 0,
3199+
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0,
32003200
&callout_length);
32013201
if (rrc < 0) return rrc; /* Abandon */
32023202
if (rrc == 0)
@@ -4057,4 +4057,10 @@ while (rws->next != NULL)
40574057
return rc;
40584058
}
40594059

4060+
/* These #undefs are here to enable unity builds with CMake. */
4061+
4062+
#undef NLBLOCK /* Block containing newline information */
4063+
#undef PSSTART /* Field containing processed string start */
4064+
#undef PSEND /* Field containing processed string end */
4065+
40604066
/* End of pcre2_dfa_match.c */

ext/pcre/pcre2lib/pcre2_internal.h

+8-9
Original file line numberDiff line numberDiff line change
@@ -220,18 +220,17 @@ not rely on this. */
220220

221221
#define COMPILE_ERROR_BASE 100
222222

223-
/* The initial frames vector for remembering backtracking points in
224-
pcre2_match() is allocated on the system stack, of this size (bytes). The size
225-
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
226-
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
227-
on the number of capturing parentheses) so 20KiB handles quite a few frames. A
228-
larger vector on the heap is obtained for patterns that need more frames. The
229-
maximum size of this can be limited. */
223+
/* The initial frames vector for remembering pcre2_match() backtracking points
224+
is allocated on the heap, of this size (bytes) or ten times the frame size if
225+
larger, unless the heap limit is smaller. Typical frame sizes are a few hundred
226+
bytes (it depends on the number of capturing parentheses) so 20KiB handles
227+
quite a few frames. A larger vector on the heap is obtained for matches that
228+
need more frames, subject to the heap limit. */
230229

231230
#define START_FRAMES_SIZE 20480
232231

233-
/* Similarly, for DFA matching, an initial internal workspace vector is
234-
allocated on the stack. */
232+
/* For DFA matching, an initial internal workspace vector is allocated on the
233+
stack. The heap is used only if this turns out to be too small. */
235234

236235
#define DFA_START_RWS_SIZE 30720
237236

ext/pcre/pcre2lib/pcre2_intmodedep.h

+17-17
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
77
88
Written by Philip Hazel
99
Original API code Copyright (c) 1997-2012 University of Cambridge
10-
New API code Copyright (c) 2016-2018 University of Cambridge
10+
New API code Copyright (c) 2016-2022 University of Cambridge
1111
1212
-----------------------------------------------------------------------------
1313
Redistribution and use in source and binary forms, with or without
@@ -649,19 +649,23 @@ the size varies from call to call. As the maximum number of capturing
649649
subpatterns is 65535 we must allow for 65536 strings to include the overall
650650
match. (See also the heapframe structure below.) */
651651

652+
struct heapframe; /* Forward reference */
653+
652654
typedef struct pcre2_real_match_data {
653-
pcre2_memctl memctl;
654-
const pcre2_real_code *code; /* The pattern used for the match */
655-
PCRE2_SPTR subject; /* The subject that was matched */
656-
PCRE2_SPTR mark; /* Pointer to last mark */
657-
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
658-
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
659-
PCRE2_SIZE startchar; /* Offset to starting code unit */
660-
uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
661-
uint8_t flags; /* Various flags */
662-
uint16_t oveccount; /* Number of pairs */
663-
int rc; /* The return code from the match */
664-
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
655+
pcre2_memctl memctl; /* Memory control fields */
656+
const pcre2_real_code *code; /* The pattern used for the match */
657+
PCRE2_SPTR subject; /* The subject that was matched */
658+
PCRE2_SPTR mark; /* Pointer to last mark */
659+
struct heapframe *heapframes; /* Backtracking frames heap memory */
660+
PCRE2_SIZE heapframes_size; /* Malloc-ed size */
661+
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
662+
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
663+
PCRE2_SIZE startchar; /* Offset to starting code unit */
664+
uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
665+
uint8_t flags; /* Various flags */
666+
uint16_t oveccount; /* Number of pairs */
667+
int rc; /* The return code from the match */
668+
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
665669
} pcre2_real_match_data;
666670

667671

@@ -854,10 +858,6 @@ doing traditional NFA matching (pcre2_match() and friends). */
854858

855859
typedef struct match_block {
856860
pcre2_memctl memctl; /* For general use */
857-
PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
858-
heapframe *match_frames; /* Points to vector of frames */
859-
heapframe *match_frames_top; /* Points after the end of the vector */
860-
heapframe *stack_frames; /* The original vector on the stack */
861861
PCRE2_SIZE heap_limit; /* As it says */
862862
uint32_t match_limit; /* As it says */
863863
uint32_t match_limit_depth; /* As it says */

0 commit comments

Comments
 (0)