Skip to content

Commit daf6caf

Browse files
committed
pv_uni_display: Use common fcn; \b mnemonic
This removes the (almost) duplicate code in this function to display mnemonics for control characters that have them. The reason the two pieces of code aren't precisely the same is that the other function also uses \b as a mnemonic for backspace. Using all possible mnemonics is desirable, so a flag is added for pv_uni_display to now use \b. This is now by default enabled in double-quoted strings, but not regex patterns (as \b there means something quite different except in character classes). B.pm is changed to expect \b.
1 parent 5e6ebb1 commit daf6caf

File tree

5 files changed

+29
-32
lines changed

5 files changed

+29
-32
lines changed

ext/B/B.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ sub import {
2020
# walkoptree comes from B.xs
2121

2222
BEGIN {
23-
$B::VERSION = '1.78';
23+
$B::VERSION = '1.79';
2424
@B::EXPORT_OK = ();
2525

2626
# Our BOOT code needs $VERSION set, and will append to @EXPORT_OK.

ext/B/B.xs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ cstring(pTHX_ SV *sv, bool perlstyle)
258258
sv_catpvs(sstr, "\\@");
259259
else if (*s == '\\')
260260
{
261-
if (memCHRs("nrftax\\",*(s+1)))
261+
if (memCHRs("nrftabx\\",*(s+1)))
262262
sv_catpvn(sstr, s++, 2);
263263
else
264264
sv_catpvs(sstr, "\\\\");

ext/B/t/b.t

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,6 @@ is(B::opnumber("pp_null"), 0, "Testing opnumber with opname (pp_null)");
290290
while (my ($test, $expect) = splice @tests, 0, 2) {
291291
is(B::perlstring($test), $expect, "B::perlstring($expect)");
292292
utf8::upgrade $test;
293-
$expect =~ s/\\b/sprintf("\\x{%x}", utf8::unicode_to_native(8))/eg;
294293
$expect =~ s/\\([0-7]{3})/sprintf "\\x\{%x\}", oct $1/eg;
295294
is(B::perlstring($test), $expect, "B::perlstring($expect) (Unicode)");
296295
}

utf8.c

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4053,9 +4053,9 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
40534053
/*
40544054
=for apidoc pv_uni_display
40554055
4056-
Build to the scalar C<dsv> a displayable version of the string C<spv>,
4057-
length C<len>, the displayable version being at most C<pvlim> bytes long
4058-
(if longer, the rest is truncated and C<"..."> will be appended).
4056+
Build to the scalar C<dsv> a displayable version of the UTF-8 encoded string
4057+
C<spv>, length C<len>, the displayable version being at most C<pvlim> bytes
4058+
long (if longer, the rest is truncated and C<"..."> will be appended).
40594059
40604060
The C<flags> argument can have C<UNI_DISPLAY_ISPRINT> set to display
40614061
C<isPRINT()>able characters as themselves, C<UNI_DISPLAY_BACKSLASH>
@@ -4064,6 +4064,9 @@ to display the C<\\[nrfta\\]> as the backslashed versions (like C<"\n">)
40644064
C<UNI_DISPLAY_QQ> (and its alias C<UNI_DISPLAY_REGEX>) have both
40654065
C<UNI_DISPLAY_BACKSLASH> and C<UNI_DISPLAY_ISPRINT> turned on.
40664066
4067+
Additionally, there is now C<UNI_DISPLAY_BACKSPACE> which allows C<\b> for a
4068+
backspace, but only when C<UNI_DISPLAY_BACKSLASH> also is set.
4069+
40674070
The pointer to the PV of the C<dsv> is returned.
40684071
40694072
See also L</sv_uni_display>.
@@ -4082,10 +4085,7 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
40824085
SvUTF8_off(dsv);
40834086
for (s = (const char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) {
40844087
UV u;
4085-
/* This serves double duty as a flag and a character to print after
4086-
a \ when flags & UNI_DISPLAY_BACKSLASH is true.
4087-
*/
4088-
char ok = 0;
4088+
bool ok = 0;
40894089

40904090
if (pvlim && SvCUR(dsv) >= pvlim) {
40914091
truncated++;
@@ -4095,27 +4095,19 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
40954095
if (u < 256) {
40964096
const unsigned char c = (unsigned char)u & 0xFF;
40974097
if (flags & UNI_DISPLAY_BACKSLASH) {
4098-
switch (c) {
4099-
case '\n':
4100-
ok = 'n'; break;
4101-
case '\r':
4102-
ok = 'r'; break;
4103-
case '\t':
4104-
ok = 't'; break;
4105-
case '\f':
4106-
ok = 'f'; break;
4107-
case '\a':
4108-
ok = 'a'; break;
4109-
case '\\':
4110-
ok = '\\'; break;
4111-
default: break;
4112-
}
4113-
if (ok) {
4114-
const char string = ok;
4115-
sv_catpvs(dsv, "\\");
4116-
sv_catpvn(dsv, &string, 1);
4117-
}
4118-
}
4098+
if ( isMNEMONIC_CNTRL(c)
4099+
&& ( c != '\b'
4100+
|| (flags & UNI_DISPLAY_BACKSPACE)))
4101+
{
4102+
const char * mnemonic = cntrl_to_mnemonic(c);
4103+
sv_catpvn(dsv, mnemonic, strlen(mnemonic));
4104+
ok = 1;
4105+
}
4106+
else if (c == '\\') {
4107+
sv_catpvs(dsv, "\\\\");
4108+
ok = 1;
4109+
}
4110+
}
41194111
/* isPRINT() is the locale-blind version. */
41204112
if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isPRINT(c)) {
41214113
const char string = c;

utf8.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,13 @@ Evaluates to 0xFFFD, the code point of the Unicode REPLACEMENT CHARACTER
10091009

10101010
#define UNI_DISPLAY_ISPRINT 0x0001
10111011
#define UNI_DISPLAY_BACKSLASH 0x0002
1012-
#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
1012+
#define UNI_DISPLAY_BACKSPACE 0x0004 /* Allow \b when also
1013+
UNI_DISPLAY_BACKSLASH */
1014+
#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \
1015+
|UNI_DISPLAY_BACKSLASH \
1016+
|UNI_DISPLAY_BACKSPACE)
1017+
1018+
/* Character classes could also allow \b, but not patterns in general */
10131019
#define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
10141020

10151021
#define ANYOF_FOLD_SHARP_S(node, input, end) \

0 commit comments

Comments
 (0)