Skip to content

Commit f97f25e

Browse files
gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset (GH-116125)
1 parent df59401 commit f97f25e

File tree

4 files changed

+21
-2
lines changed

4 files changed

+21
-2
lines changed

Lib/email/generator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def _handle_text(self, msg):
243243
# existing message.
244244
msg = deepcopy(msg)
245245
del msg['content-transfer-encoding']
246-
msg.set_payload(payload, charset)
246+
msg.set_payload(msg._payload, charset)
247247
payload = msg.get_payload()
248248
self._munge_cte = (msg['content-transfer-encoding'],
249249
msg['content-type'])

Lib/email/message.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def set_payload(self, payload, charset=None):
340340
return
341341
if not isinstance(charset, Charset):
342342
charset = Charset(charset)
343-
payload = payload.encode(charset.output_charset)
343+
payload = payload.encode(charset.output_charset, 'surrogateescape')
344344
if hasattr(payload, 'decode'):
345345
self._payload = payload.decode('ascii', 'surrogateescape')
346346
else:

Lib/test/test_email/test_email.py

+15
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,21 @@ def test_nonascii_as_string_without_cte(self):
337337
msg = email.message_from_bytes(source)
338338
self.assertEqual(msg.as_string(), expected)
339339

340+
def test_nonascii_as_string_with_ascii_charset(self):
341+
m = textwrap.dedent("""\
342+
MIME-Version: 1.0
343+
Content-type: text/plain; charset="us-ascii"
344+
Content-Transfer-Encoding: 8bit
345+
346+
Test if non-ascii messages with no Content-Transfer-Encoding set
347+
can be as_string'd:
348+
Föö bär
349+
""")
350+
source = m.encode('iso-8859-1')
351+
expected = source.decode('ascii', 'replace')
352+
msg = email.message_from_bytes(source)
353+
self.assertEqual(msg.as_string(), expected)
354+
340355
def test_nonascii_as_string_without_content_type_and_cte(self):
341356
m = textwrap.dedent("""\
342357
MIME-Version: 1.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
2+
a message that claims to be in the ascii character set actually has non-ascii
3+
characters. Non-ascii characters are now replaced with the U+FFFD replacement
4+
character, like in the ``replace`` error handler.

0 commit comments

Comments
 (0)