Skip to content

Commit bcb8752

Browse files
author
Karl Williamson
committed
regcomp.c: Rmv code for delayed 'til runtime POSIX defns
The previous commit made compile-time inversion lists available for all POSIX classes, not just some.. Therefore the code that deals with not having them available until runtime can be removed. This commit does the largest chunk of this code, used when a POSIX class is used within a bracketed character class. Later commits will remove more.
1 parent c496e78 commit bcb8752

File tree

1 file changed

+12
-131
lines changed

1 file changed

+12
-131
lines changed

regcomp.c

+12-131
Original file line numberDiff line numberDiff line change
@@ -5991,35 +5991,44 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
59915991
= _new_invlist_C_array(L1PosixAlnum_invlist);
59925992
PL_Posix_ptrs[_CC_ALPHANUMERIC]
59935993
= _new_invlist_C_array(PosixAlnum_invlist);
5994+
PL_XPosix_ptrs[_CC_ALPHANUMERIC]
5995+
= _new_invlist_C_array(XPosixAlnum_invlist);
59945996

59955997
PL_L1Posix_ptrs[_CC_ALPHA]
59965998
= _new_invlist_C_array(L1PosixAlpha_invlist);
59975999
PL_Posix_ptrs[_CC_ALPHA] = _new_invlist_C_array(PosixAlpha_invlist);
6000+
PL_XPosix_ptrs[_CC_ALPHA] = _new_invlist_C_array(XPosixAlpha_invlist);
59986001

59996002
PL_Posix_ptrs[_CC_BLANK] = _new_invlist_C_array(PosixBlank_invlist);
60006003
PL_XPosix_ptrs[_CC_BLANK] = _new_invlist_C_array(XPosixBlank_invlist);
60016004

60026005
/* Cased is the same as Alpha in the ASCII range */
60036006
PL_L1Posix_ptrs[_CC_CASED] = _new_invlist_C_array(L1Cased_invlist);
60046007
PL_Posix_ptrs[_CC_CASED] = _new_invlist_C_array(PosixAlpha_invlist);
6008+
PL_XPosix_ptrs[_CC_CASED] = _new_invlist_C_array(Cased_invlist);
60056009

60066010
PL_Posix_ptrs[_CC_CNTRL] = _new_invlist_C_array(PosixCntrl_invlist);
60076011
PL_XPosix_ptrs[_CC_CNTRL] = _new_invlist_C_array(XPosixCntrl_invlist);
60086012

60096013
PL_Posix_ptrs[_CC_DIGIT] = _new_invlist_C_array(PosixDigit_invlist);
60106014
PL_L1Posix_ptrs[_CC_DIGIT] = _new_invlist_C_array(PosixDigit_invlist);
6015+
PL_XPosix_ptrs[_CC_DIGIT] = _new_invlist_C_array(XPosixDigit_invlist);
60116016

60126017
PL_L1Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(L1PosixGraph_invlist);
60136018
PL_Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(PosixGraph_invlist);
6019+
PL_XPosix_ptrs[_CC_GRAPH] = _new_invlist_C_array(XPosixGraph_invlist);
60146020

60156021
PL_L1Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(L1PosixLower_invlist);
60166022
PL_Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(PosixLower_invlist);
6023+
PL_XPosix_ptrs[_CC_LOWER] = _new_invlist_C_array(XPosixLower_invlist);
60176024

60186025
PL_L1Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(L1PosixPrint_invlist);
60196026
PL_Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(PosixPrint_invlist);
6027+
PL_XPosix_ptrs[_CC_PRINT] = _new_invlist_C_array(XPosixPrint_invlist);
60206028

60216029
PL_L1Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(L1PosixPunct_invlist);
60226030
PL_Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(PosixPunct_invlist);
6031+
PL_XPosix_ptrs[_CC_PUNCT] = _new_invlist_C_array(XPosixPunct_invlist);
60236032

60246033
PL_Posix_ptrs[_CC_SPACE] = _new_invlist_C_array(PerlSpace_invlist);
60256034
PL_XPosix_ptrs[_CC_SPACE] = _new_invlist_C_array(XPerlSpace_invlist);
@@ -6028,12 +6037,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
60286037

60296038
PL_L1Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(L1PosixUpper_invlist);
60306039
PL_Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(PosixUpper_invlist);
6040+
PL_XPosix_ptrs[_CC_UPPER] = _new_invlist_C_array(XPosixUpper_invlist);
60316041

60326042
PL_XPosix_ptrs[_CC_VERTSPACE] = _new_invlist_C_array(VertSpace_invlist);
60336043

60346044
PL_Posix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(PosixWord_invlist);
60356045
PL_L1Posix_ptrs[_CC_WORDCHAR]
60366046
= _new_invlist_C_array(L1PosixWord_invlist);
6047+
PL_XPosix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(XPosixWord_invlist);
60376048

60386049
PL_Posix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(PosixXDigit_invlist);
60396050
PL_XPosix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(XPosixXDigit_invlist);
@@ -12886,10 +12897,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1288612897
#endif
1288712898
bool invert = FALSE; /* Is this class to be complemented */
1288812899

12889-
/* Is there any thing like \W or [:^digit:] that matches above the legal
12890-
* Unicode range? */
12891-
bool runtime_posix_matches_above_Unicode = FALSE;
12892-
1289312900
bool warn_super = ALWAYS_WARN_SUPER;
1289412901

1289512902
regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
@@ -13467,124 +13474,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1346713474
/* The ascii range inversion list */
1346813475
SV* ascii_source = PL_Posix_ptrs[classnum];
1346913476

13470-
/* The full Latin1 range inversion list */
13471-
SV* l1_source = PL_L1Posix_ptrs[classnum];
13472-
13473-
/* This code is structured into two major clauses. The
13474-
* first is for classes whose complete definitions may not
13475-
* already be known. If not, the Latin1 definition
13476-
* (guaranteed to already known) is used plus code is
13477-
* generated to load the rest at run-time (only if needed).
13478-
* If the complete definition is known, it drops down to
13479-
* the second clause, where the complete definition is
13480-
* known */
13481-
13482-
if (classnum < _FIRST_NON_SWASH_CC) {
13483-
13484-
/* Here, the class has a swash, which may or not
13485-
* already be loaded */
13486-
13487-
/* The name of the property to use to match the full
13488-
* eXtended Unicode range swash for this character
13489-
* class */
13490-
const char *Xname = swash_property_names[classnum];
13491-
13492-
/* If returning the inversion list, we can't defer
13493-
* getting this until runtime */
13494-
if (ret_invlist && ! PL_utf8_swash_ptrs[classnum]) {
13495-
PL_utf8_swash_ptrs[classnum] =
13496-
_core_swash_init("utf8", Xname, &PL_sv_undef,
13497-
1, /* binary */
13498-
0, /* not tr/// */
13499-
NULL, /* No inversion list */
13500-
NULL /* No flags */
13501-
);
13502-
assert(PL_utf8_swash_ptrs[classnum]);
13503-
}
13504-
if ( ! PL_utf8_swash_ptrs[classnum]) {
13505-
if (namedclass % 2 == 0) { /* A non-complemented
13506-
class */
13507-
/* If not /a matching, there are code points we
13508-
* don't know at compile time. Arrange for the
13509-
* unknown matches to be loaded at run-time, if
13510-
* needed */
13511-
if (! AT_LEAST_ASCII_RESTRICTED) {
13512-
Perl_sv_catpvf(aTHX_ listsv, "+utf8::%s\n",
13513-
Xname);
13514-
}
13515-
if (LOC) { /* Under locale, set run-time
13516-
lookup */
13517-
ANYOF_POSIXL_SET(ret, namedclass);
13518-
}
13519-
else {
13520-
/* Add the current class's code points to
13521-
* the running total */
13522-
_invlist_union(posixes,
13523-
(AT_LEAST_ASCII_RESTRICTED)
13524-
? ascii_source
13525-
: l1_source,
13526-
&posixes);
13527-
}
13528-
}
13529-
else { /* A complemented class */
13530-
if (AT_LEAST_ASCII_RESTRICTED) {
13531-
/* Under /a should match everything above
13532-
* ASCII, plus the complement of the set's
13533-
* ASCII matches */
13534-
_invlist_union_complement_2nd(posixes,
13535-
ascii_source,
13536-
&posixes);
13537-
}
13538-
else {
13539-
/* Arrange for the unknown matches to be
13540-
* loaded at run-time, if needed */
13541-
Perl_sv_catpvf(aTHX_ listsv, "!utf8::%s\n",
13542-
Xname);
13543-
runtime_posix_matches_above_Unicode = TRUE;
13544-
if (LOC) {
13545-
ANYOF_POSIXL_SET(ret, namedclass);
13546-
}
13547-
else {
13548-
13549-
/* We want to match everything in
13550-
* Latin1, except those things that
13551-
* l1_source matches */
13552-
SV* scratch_list = NULL;
13553-
_invlist_subtract(PL_Latin1, l1_source,
13554-
&scratch_list);
13555-
13556-
/* Add the list from this class to the
13557-
* running total */
13558-
if (! posixes) {
13559-
posixes = scratch_list;
13560-
}
13561-
else {
13562-
_invlist_union(posixes,
13563-
scratch_list,
13564-
&posixes);
13565-
SvREFCNT_dec_NN(scratch_list);
13566-
}
13567-
if (DEPENDS_SEMANTICS) {
13568-
ANYOF_FLAGS(ret)
13569-
|= ANYOF_NON_UTF8_LATIN1_ALL;
13570-
}
13571-
}
13572-
}
13573-
}
13574-
goto namedclass_done;
13575-
}
13576-
13577-
/* Here, there is a swash loaded for the class. If no
13578-
* inversion list for it yet, get it */
13579-
if (! PL_XPosix_ptrs[classnum]) {
13580-
PL_XPosix_ptrs[classnum]
13581-
= _swash_to_invlist(PL_utf8_swash_ptrs[classnum]);
13582-
}
13583-
}
13584-
13585-
/* Here there is an inversion list already loaded for the
13586-
* entire class */
13587-
1358813477
if (namedclass % 2 == 0) { /* A non-complemented class,
1358913478
like ANYOF_PUNCT */
1359013479
if (! LOC) {
@@ -13680,7 +13569,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1368013569
}
1368113570
}
1368213571
}
13683-
namedclass_done:
1368413572
continue; /* Go get next character */
1368513573
}
1368613574
} /* end of namedclass \blah */
@@ -14431,14 +14319,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1443114319
* are using above-Unicode code points indicates they should know
1443214320
* the issues involved */
1443314321
if (warn_super) {
14434-
bool non_prop_matches_above_Unicode =
14435-
runtime_posix_matches_above_Unicode
14436-
| (invlist_highest(cp_list) > PERL_UNICODE_MAX);
14437-
if (invert) {
14438-
non_prop_matches_above_Unicode =
14439-
! non_prop_matches_above_Unicode;
14440-
}
14441-
warn_super = ! non_prop_matches_above_Unicode;
14322+
warn_super = ! (invert ^ (invlist_highest(cp_list) > PERL_UNICODE_MAX));
1444214323
}
1444314324

1444414325
_invlist_union(properties, cp_list, &cp_list);

0 commit comments

Comments
 (0)