@@ -5991,35 +5991,44 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
5991
5991
= _new_invlist_C_array(L1PosixAlnum_invlist);
5992
5992
PL_Posix_ptrs[_CC_ALPHANUMERIC]
5993
5993
= _new_invlist_C_array(PosixAlnum_invlist);
5994
+ PL_XPosix_ptrs[_CC_ALPHANUMERIC]
5995
+ = _new_invlist_C_array(XPosixAlnum_invlist);
5994
5996
5995
5997
PL_L1Posix_ptrs[_CC_ALPHA]
5996
5998
= _new_invlist_C_array(L1PosixAlpha_invlist);
5997
5999
PL_Posix_ptrs[_CC_ALPHA] = _new_invlist_C_array(PosixAlpha_invlist);
6000
+ PL_XPosix_ptrs[_CC_ALPHA] = _new_invlist_C_array(XPosixAlpha_invlist);
5998
6001
5999
6002
PL_Posix_ptrs[_CC_BLANK] = _new_invlist_C_array(PosixBlank_invlist);
6000
6003
PL_XPosix_ptrs[_CC_BLANK] = _new_invlist_C_array(XPosixBlank_invlist);
6001
6004
6002
6005
/* Cased is the same as Alpha in the ASCII range */
6003
6006
PL_L1Posix_ptrs[_CC_CASED] = _new_invlist_C_array(L1Cased_invlist);
6004
6007
PL_Posix_ptrs[_CC_CASED] = _new_invlist_C_array(PosixAlpha_invlist);
6008
+ PL_XPosix_ptrs[_CC_CASED] = _new_invlist_C_array(Cased_invlist);
6005
6009
6006
6010
PL_Posix_ptrs[_CC_CNTRL] = _new_invlist_C_array(PosixCntrl_invlist);
6007
6011
PL_XPosix_ptrs[_CC_CNTRL] = _new_invlist_C_array(XPosixCntrl_invlist);
6008
6012
6009
6013
PL_Posix_ptrs[_CC_DIGIT] = _new_invlist_C_array(PosixDigit_invlist);
6010
6014
PL_L1Posix_ptrs[_CC_DIGIT] = _new_invlist_C_array(PosixDigit_invlist);
6015
+ PL_XPosix_ptrs[_CC_DIGIT] = _new_invlist_C_array(XPosixDigit_invlist);
6011
6016
6012
6017
PL_L1Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(L1PosixGraph_invlist);
6013
6018
PL_Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(PosixGraph_invlist);
6019
+ PL_XPosix_ptrs[_CC_GRAPH] = _new_invlist_C_array(XPosixGraph_invlist);
6014
6020
6015
6021
PL_L1Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(L1PosixLower_invlist);
6016
6022
PL_Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(PosixLower_invlist);
6023
+ PL_XPosix_ptrs[_CC_LOWER] = _new_invlist_C_array(XPosixLower_invlist);
6017
6024
6018
6025
PL_L1Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(L1PosixPrint_invlist);
6019
6026
PL_Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(PosixPrint_invlist);
6027
+ PL_XPosix_ptrs[_CC_PRINT] = _new_invlist_C_array(XPosixPrint_invlist);
6020
6028
6021
6029
PL_L1Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(L1PosixPunct_invlist);
6022
6030
PL_Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(PosixPunct_invlist);
6031
+ PL_XPosix_ptrs[_CC_PUNCT] = _new_invlist_C_array(XPosixPunct_invlist);
6023
6032
6024
6033
PL_Posix_ptrs[_CC_SPACE] = _new_invlist_C_array(PerlSpace_invlist);
6025
6034
PL_XPosix_ptrs[_CC_SPACE] = _new_invlist_C_array(XPerlSpace_invlist);
@@ -6028,12 +6037,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
6028
6037
6029
6038
PL_L1Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(L1PosixUpper_invlist);
6030
6039
PL_Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(PosixUpper_invlist);
6040
+ PL_XPosix_ptrs[_CC_UPPER] = _new_invlist_C_array(XPosixUpper_invlist);
6031
6041
6032
6042
PL_XPosix_ptrs[_CC_VERTSPACE] = _new_invlist_C_array(VertSpace_invlist);
6033
6043
6034
6044
PL_Posix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(PosixWord_invlist);
6035
6045
PL_L1Posix_ptrs[_CC_WORDCHAR]
6036
6046
= _new_invlist_C_array(L1PosixWord_invlist);
6047
+ PL_XPosix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(XPosixWord_invlist);
6037
6048
6038
6049
PL_Posix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(PosixXDigit_invlist);
6039
6050
PL_XPosix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(XPosixXDigit_invlist);
@@ -12886,10 +12897,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
12886
12897
#endif
12887
12898
bool invert = FALSE; /* Is this class to be complemented */
12888
12899
12889
- /* Is there any thing like \W or [:^digit:] that matches above the legal
12890
- * Unicode range? */
12891
- bool runtime_posix_matches_above_Unicode = FALSE;
12892
-
12893
12900
bool warn_super = ALWAYS_WARN_SUPER;
12894
12901
12895
12902
regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
@@ -13467,124 +13474,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
13467
13474
/* The ascii range inversion list */
13468
13475
SV* ascii_source = PL_Posix_ptrs[classnum];
13469
13476
13470
- /* The full Latin1 range inversion list */
13471
- SV* l1_source = PL_L1Posix_ptrs[classnum];
13472
-
13473
- /* This code is structured into two major clauses. The
13474
- * first is for classes whose complete definitions may not
13475
- * already be known. If not, the Latin1 definition
13476
- * (guaranteed to already known) is used plus code is
13477
- * generated to load the rest at run-time (only if needed).
13478
- * If the complete definition is known, it drops down to
13479
- * the second clause, where the complete definition is
13480
- * known */
13481
-
13482
- if (classnum < _FIRST_NON_SWASH_CC) {
13483
-
13484
- /* Here, the class has a swash, which may or not
13485
- * already be loaded */
13486
-
13487
- /* The name of the property to use to match the full
13488
- * eXtended Unicode range swash for this character
13489
- * class */
13490
- const char *Xname = swash_property_names[classnum];
13491
-
13492
- /* If returning the inversion list, we can't defer
13493
- * getting this until runtime */
13494
- if (ret_invlist && ! PL_utf8_swash_ptrs[classnum]) {
13495
- PL_utf8_swash_ptrs[classnum] =
13496
- _core_swash_init("utf8", Xname, &PL_sv_undef,
13497
- 1, /* binary */
13498
- 0, /* not tr/// */
13499
- NULL, /* No inversion list */
13500
- NULL /* No flags */
13501
- );
13502
- assert(PL_utf8_swash_ptrs[classnum]);
13503
- }
13504
- if ( ! PL_utf8_swash_ptrs[classnum]) {
13505
- if (namedclass % 2 == 0) { /* A non-complemented
13506
- class */
13507
- /* If not /a matching, there are code points we
13508
- * don't know at compile time. Arrange for the
13509
- * unknown matches to be loaded at run-time, if
13510
- * needed */
13511
- if (! AT_LEAST_ASCII_RESTRICTED) {
13512
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::%s\n",
13513
- Xname);
13514
- }
13515
- if (LOC) { /* Under locale, set run-time
13516
- lookup */
13517
- ANYOF_POSIXL_SET(ret, namedclass);
13518
- }
13519
- else {
13520
- /* Add the current class's code points to
13521
- * the running total */
13522
- _invlist_union(posixes,
13523
- (AT_LEAST_ASCII_RESTRICTED)
13524
- ? ascii_source
13525
- : l1_source,
13526
- &posixes);
13527
- }
13528
- }
13529
- else { /* A complemented class */
13530
- if (AT_LEAST_ASCII_RESTRICTED) {
13531
- /* Under /a should match everything above
13532
- * ASCII, plus the complement of the set's
13533
- * ASCII matches */
13534
- _invlist_union_complement_2nd(posixes,
13535
- ascii_source,
13536
- &posixes);
13537
- }
13538
- else {
13539
- /* Arrange for the unknown matches to be
13540
- * loaded at run-time, if needed */
13541
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::%s\n",
13542
- Xname);
13543
- runtime_posix_matches_above_Unicode = TRUE;
13544
- if (LOC) {
13545
- ANYOF_POSIXL_SET(ret, namedclass);
13546
- }
13547
- else {
13548
-
13549
- /* We want to match everything in
13550
- * Latin1, except those things that
13551
- * l1_source matches */
13552
- SV* scratch_list = NULL;
13553
- _invlist_subtract(PL_Latin1, l1_source,
13554
- &scratch_list);
13555
-
13556
- /* Add the list from this class to the
13557
- * running total */
13558
- if (! posixes) {
13559
- posixes = scratch_list;
13560
- }
13561
- else {
13562
- _invlist_union(posixes,
13563
- scratch_list,
13564
- &posixes);
13565
- SvREFCNT_dec_NN(scratch_list);
13566
- }
13567
- if (DEPENDS_SEMANTICS) {
13568
- ANYOF_FLAGS(ret)
13569
- |= ANYOF_NON_UTF8_LATIN1_ALL;
13570
- }
13571
- }
13572
- }
13573
- }
13574
- goto namedclass_done;
13575
- }
13576
-
13577
- /* Here, there is a swash loaded for the class. If no
13578
- * inversion list for it yet, get it */
13579
- if (! PL_XPosix_ptrs[classnum]) {
13580
- PL_XPosix_ptrs[classnum]
13581
- = _swash_to_invlist(PL_utf8_swash_ptrs[classnum]);
13582
- }
13583
- }
13584
-
13585
- /* Here there is an inversion list already loaded for the
13586
- * entire class */
13587
-
13588
13477
if (namedclass % 2 == 0) { /* A non-complemented class,
13589
13478
like ANYOF_PUNCT */
13590
13479
if (! LOC) {
@@ -13680,7 +13569,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
13680
13569
}
13681
13570
}
13682
13571
}
13683
- namedclass_done:
13684
13572
continue; /* Go get next character */
13685
13573
}
13686
13574
} /* end of namedclass \blah */
@@ -14431,14 +14319,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
14431
14319
* are using above-Unicode code points indicates they should know
14432
14320
* the issues involved */
14433
14321
if (warn_super) {
14434
- bool non_prop_matches_above_Unicode =
14435
- runtime_posix_matches_above_Unicode
14436
- | (invlist_highest(cp_list) > PERL_UNICODE_MAX);
14437
- if (invert) {
14438
- non_prop_matches_above_Unicode =
14439
- ! non_prop_matches_above_Unicode;
14440
- }
14441
- warn_super = ! non_prop_matches_above_Unicode;
14322
+ warn_super = ! (invert ^ (invlist_highest(cp_list) > PERL_UNICODE_MAX));
14442
14323
}
14443
14324
14444
14325
_invlist_union(properties, cp_list, &cp_list);
0 commit comments