gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset (GH-116125)

This commit is contained in:
Serhiy Storchaka 2024-03-05 17:49:01 +02:00 committed by GitHub
parent df59401108
commit f97f25ef5d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 21 additions and 2 deletions

View file

@ -243,7 +243,7 @@ def _handle_text(self, msg):
# existing message.
msg = deepcopy(msg)
del msg['content-transfer-encoding']
msg.set_payload(payload, charset)
msg.set_payload(msg._payload, charset)
payload = msg.get_payload()
self._munge_cte = (msg['content-transfer-encoding'],
msg['content-type'])

View file

@ -340,7 +340,7 @@ def set_payload(self, payload, charset=None):
return
if not isinstance(charset, Charset):
charset = Charset(charset)
payload = payload.encode(charset.output_charset)
payload = payload.encode(charset.output_charset, 'surrogateescape')
if hasattr(payload, 'decode'):
self._payload = payload.decode('ascii', 'surrogateescape')
else:

View file

@ -337,6 +337,21 @@ def test_nonascii_as_string_without_cte(self):
msg = email.message_from_bytes(source)
self.assertEqual(msg.as_string(), expected)
def test_nonascii_as_string_with_ascii_charset(self):
m = textwrap.dedent("""\
MIME-Version: 1.0
Content-type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 8bit
Test if non-ascii messages with no Content-Transfer-Encoding set
can be as_string'd:
Föö bär
""")
source = m.encode('iso-8859-1')
expected = source.decode('ascii', 'replace')
msg = email.message_from_bytes(source)
self.assertEqual(msg.as_string(), expected)
def test_nonascii_as_string_without_content_type_and_cte(self):
m = textwrap.dedent("""\
MIME-Version: 1.0

View file

@ -0,0 +1,4 @@
Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
a message that claims to be in the ascii character set actually has non-ascii
characters. Non-ascii characters are now replaced with the U+FFFD replacement
character, like in the ``replace`` error handler.