Skip to content

Commit 0abca3e

Browse files
committed
Stop incorrectly RFC 2047 encoding non-ASCII email addresses
Email generators had been incorrectly flattening non-ASCII email addresses to RFC 2047 encoded-word format, leaving them undeliverable. (RFC 2047 prohibits use of encoded-word in an addr-spec.) This change raises a ValueError when attempting to flatten an EmailMessage with a non-ASCII addr-spec and a policy with utf8=False. (Exception: If the non-ASCII address originated from parsing a message, it will be flattened as originally parsed, without error.) Non-ASCII email addresses are supported when using a policy with utf8=True (such as email.policy.SMTPUTF8) under RFCs 6531 and 6532. Non-ASCII email address domains (but not localparts) can also be used with non-SMTPUTF8 policies by encoding the domain as an IDNA A-label. (The email package does not perform this encoding, because it cannot know whether the caller wants IDNA 2003, IDNA 2008, or some other variant such as UTS python#46.)
1 parent 46f5a4f commit 0abca3e

File tree

3 files changed

+75
-4
lines changed

3 files changed

+75
-4
lines changed

Doc/library/email.policy.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,17 @@ added matters. To illustrate::
406406
.. attribute:: utf8
407407

408408
If ``False``, follow :rfc:`5322`, supporting non-ASCII characters in
409-
headers by encoding them as "encoded words". If ``True``, follow
410-
:rfc:`6532` and use ``utf-8`` encoding for headers. Messages
409+
headers by encoding them as :rfc:`2047` "encoded words". If ``True``,
410+
follow :rfc:`6532` and use ``utf-8`` encoding for headers. Messages
411411
formatted in this way may be passed to SMTP servers that support
412412
the ``SMTPUTF8`` extension (:rfc:`6531`).
413413

414+
.. versionchanged:: 3.13
415+
If ``False``, the generator will raise a ``ValueError`` if any email
416+
address contains non-ASCII characters. To send to a non-ASCII domain
417+
with ``utf8=False``, encode the domain using the third-party
418+
:pypi:`idna` module or :mod:`encodings.idna`. No RFC allows a non-ASCII
419+
username ("localpart") in an email address with ``utf8=False``.
414420

415421
.. attribute:: refold_source
416422

Lib/email/_header_value_parser.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2829,6 +2829,17 @@ def _refold_parse_tree(parse_tree, *, policy):
28292829
_fold_mime_parameters(part, lines, maxlen, encoding)
28302830
continue
28312831

2832+
if want_encoding and part.token_type == 'addr-spec':
2833+
# RFC2047 forbids encoded-word in any part of an addr-spec.
2834+
if charset == 'unknown-8bit':
2835+
# Non-ASCII addr-spec came from parsed message; leave unchanged.
2836+
want_encoding = False
2837+
else:
2838+
raise ValueError(
2839+
"Non-ASCII address requires policy with utf8=True:"
2840+
" '{}'".format(part)
2841+
)
2842+
28322843
if want_encoding and not wrap_as_ew_blocked:
28332844
if not part.as_ew_allowed:
28342845
want_encoding = False

Lib/test/test_email/test_generator.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import io
2+
import re
23
import textwrap
34
import unittest
45
from email import message_from_string, message_from_bytes
@@ -288,6 +289,28 @@ def test_keep_long_encoded_newlines(self):
288289
g.flatten(msg)
289290
self.assertEqual(s.getvalue(), self.typ(expected))
290291

292+
def test_non_ascii_addr_spec_raises(self):
293+
# RFC2047 encoded-word is not permitted in any part of an addr-spec.
294+
# (See also test_non_ascii_addr_spec_preserved below.)
295+
g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
296+
cases = [
297+
'wők@example.com',
298+
'wok@exàmple.com',
299+
'wők@exàmple.com',
300+
'"Name, for display" <wők@example.com>',
301+
'Näyttönimi <wők@example.com>',
302+
]
303+
for address in cases:
304+
with self.subTest(address=address):
305+
msg = EmailMessage()
306+
msg['To'] = address
307+
expected_error = re.escape(
308+
"Non-ASCII address requires policy with utf8=True:"
309+
" '{}'".format(msg['To'].addresses[0].addr_spec)
310+
)
311+
with self.assertRaisesRegex(ValueError, expected_error):
312+
g.flatten(msg)
313+
291314

292315
class TestGenerator(TestGeneratorBase, TestEmailBase):
293316

@@ -432,12 +455,12 @@ def test_cte_type_7bit_transforms_8bit_cte(self):
432455

433456
def test_smtputf8_policy(self):
434457
msg = EmailMessage()
435-
msg['From'] = "Páolo <főo@bar.com>"
458+
msg['From'] = "Páolo <főo@bàr.com>"
436459
msg['To'] = 'Dinsdale'
437460
msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
438461
msg.set_content("oh là là, know what I mean, know what I mean?")
439462
expected = textwrap.dedent("""\
440-
From: Páolo <főo@bar.com>
463+
From: Páolo <főo@bàr.com>
441464
To: Dinsdale
442465
Subject: Nudge nudge, wink, wink \u1F609
443466
Content-Type: text/plain; charset="utf-8"
@@ -472,6 +495,37 @@ def test_smtp_policy(self):
472495
g.flatten(msg)
473496
self.assertEqual(s.getvalue(), expected)
474497

498+
def test_non_ascii_addr_spec_preserved(self):
499+
# A defective non-ASCII addr-spec parsed from the original
500+
# message is left unchanged when flattening.
501+
# (See also test_non_ascii_addr_spec_raises above.)
502+
source = (
503+
'To: jörg@example.com, "But a long name still works with refold_source" <jörg@example.com>'
504+
).encode()
505+
expected = (
506+
b'To: j\xc3\xb6rg@example.com,\n'
507+
b' "But a long name still works with refold_source" <j\xc3\xb6rg@example.com>\n'
508+
b'\n'
509+
)
510+
msg = message_from_bytes(source, policy=policy.default)
511+
s = io.BytesIO()
512+
g = BytesGenerator(s, policy=policy.default)
513+
g.flatten(msg)
514+
self.assertEqual(s.getvalue(), expected)
515+
516+
def test_idna_encoding_preserved(self):
517+
# Nothing tries to decode a pre-encoded IDNA domain.
518+
msg = EmailMessage()
519+
msg["To"] = Address(
520+
username='jörg',
521+
domain='☕.example'.encode('idna').decode() # IDNA 2003
522+
)
523+
expected = 'To: jörg@xn--53h.example\n\n'.encode()
524+
s = io.BytesIO()
525+
g = BytesGenerator(s, policy=policy.default.clone(utf8=True))
526+
g.flatten(msg)
527+
self.assertEqual(s.getvalue(), expected)
528+
475529

476530
if __name__ == '__main__':
477531
unittest.main()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy