Commit ec009ef1 authored by Sergei Maertens's avatar Sergei Maertens Committed by Tim Graham
Browse files

Fixed #25986 -- Fixed crash sending email with non-ASCII in local part of the address.

On Python 3, sending emails failed for addresses containing non-ASCII
characters due to the usage of the legacy Python email.utils.formataddr()
function. This is fixed by using the proper Address object on Python 3.
parent 086510fd
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -660,6 +660,7 @@ answer newbie questions, and generally made Django that much better:
    Sengtha Chay <sengtha@e-khmer.com>
    Senko Rašić <senko.rasic@dobarkod.hr>
    serbaut@gmail.com
    Sergei Maertens <sergeimaertens@gmail.com>
    Sergey Fedoseev <fedoseev.sergey@gmail.com>
    Sergey Kolosov <m17.admin@gmail.com>
    Seth Hill <sethrh@gmail.com>
+3 −3
Original line number Diff line number Diff line
@@ -115,9 +115,9 @@ class EmailBackend(BaseEmailBackend):
        """A helper method that does the actual sending."""
        if not email_message.recipients():
            return False
        from_email = sanitize_address(email_message.from_email, email_message.encoding)
        recipients = [sanitize_address(addr, email_message.encoding)
                      for addr in email_message.recipients()]
        encoding = email_message.encoding or settings.DEFAULT_CHARSET
        from_email = sanitize_address(email_message.from_email, encoding)
        recipients = [sanitize_address(addr, encoding) for addr in email_message.recipients()]
        message = email_message.message()
        try:
            self.connection.sendmail(from_email, recipients, message.as_bytes(linesep='\r\n'))
+53 −9
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ from email.mime.base import MIMEBase
from email.mime.message import MIMEMessage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import formataddr, formatdate, getaddresses, parseaddr
from email.utils import formatdate, getaddresses, parseaddr
from io import BytesIO

from django.conf import settings
@@ -103,23 +103,67 @@ def forbid_multi_line_headers(name, val, encoding):
    return str(name), val


def split_addr(addr, encoding):
    """
    Split the address into local part and domain, properly encoded.

    When non-ascii characters are present in the local part, it must be
    MIME-word encoded. The domain name must be idna-encoded if it contains
    non-ascii characters.
    """
    if '@' in addr:
        localpart, domain = addr.split('@', 1)
        # Try to get the simplest encoding - ascii if possible so that
        # to@example.com doesn't become =?utf-8?q?to?=@example.com. This
        # makes unit testing a bit easier and more readable.
        try:
            localpart.encode('ascii')
        except UnicodeEncodeError:
            localpart = Header(localpart, encoding).encode()
        domain = domain.encode('idna').decode('ascii')
    else:
        localpart = Header(addr, encoding).encode()
        domain = ''
    return (localpart, domain)


def sanitize_address(addr, encoding):
    """
    Format a pair of (name, address) or an email address string.
    """
    if not isinstance(addr, tuple):
        addr = parseaddr(force_text(addr))
    nm, addr = addr
    localpart, domain = None, None
    nm = Header(nm, encoding).encode()
    try:
        addr.encode('ascii')
    except UnicodeEncodeError:  # IDN
        if '@' in addr:
            localpart, domain = addr.split('@', 1)
            localpart = str(Header(localpart, encoding))
            domain = domain.encode('idna').decode('ascii')
    except UnicodeEncodeError:  # IDN or non-ascii in the local part
        localpart, domain = split_addr(addr, encoding)

    if six.PY2:
        # On Python 2, use the stdlib since `email.headerregistry` doesn't exist.
        from email.utils import formataddr
        if localpart and domain:
            addr = '@'.join([localpart, domain])
        else:
            addr = Header(addr, encoding).encode()
        return formataddr((nm, addr))

    # On Python 3, an `email.headerregistry.Address` object is used since
    # email.utils.formataddr() naively encodes the name as ascii (see #25986).
    from email.headerregistry import Address
    from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect

    if localpart and domain:
        address = Address(nm, username=localpart, domain=domain)
        return str(address)

    try:
        address = Address(nm, addr_spec=addr)
    except (InvalidHeaderDefect, NonASCIILocalPartDefect):
        localpart, domain = split_addr(addr, encoding)
        address = Address(nm, username=localpart, domain=domain)
    return str(address)


class MIMEMixin():
    def as_string(self, unixfrom=False, linesep='\n'):
+47 −1
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ import smtpd
import sys
import tempfile
import threading
from email.header import Header
from email.mime.text import MIMEText
from smtplib import SMTP, SMTPException
from ssl import SSLError
@@ -19,7 +20,7 @@ from django.core.mail import (
    send_mail, send_mass_mail,
)
from django.core.mail.backends import console, dummy, filebased, locmem, smtp
from django.core.mail.message import BadHeaderError
from django.core.mail.message import BadHeaderError, sanitize_address
from django.test import SimpleTestCase, override_settings
from django.utils._os import upath
from django.utils.encoding import force_bytes, force_text
@@ -567,6 +568,42 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
        # Verify that the child message header is not base64 encoded
        self.assertIn(str('Child Subject'), parent_s)

    def test_sanitize_address(self):
        """
        Email addresses are properly sanitized.
        """
        # Simple ASCII address - string form
        self.assertEqual(sanitize_address('to@example.com', 'ascii'), 'to@example.com')
        self.assertEqual(sanitize_address('to@example.com', 'utf-8'), 'to@example.com')
        # Bytestrings are transformed to normal strings.
        self.assertEqual(sanitize_address(b'to@example.com', 'utf-8'), 'to@example.com')

        # Simple ASCII address - tuple form
        self.assertEqual(
            sanitize_address(('A name', 'to@example.com'), 'ascii'),
            'A name <to@example.com>'
        )
        if PY3:
            self.assertEqual(
                sanitize_address(('A name', 'to@example.com'), 'utf-8'),
                '=?utf-8?q?A_name?= <to@example.com>'
            )
        else:
            self.assertEqual(
                sanitize_address(('A name', 'to@example.com'), 'utf-8'),
                'A name <to@example.com>'
            )

        # Unicode characters are are supported in RFC-6532.
        self.assertEqual(
            sanitize_address('tó@example.com', 'utf-8'),
            '=?utf-8?b?dMOz?=@example.com'
        )
        self.assertEqual(
            sanitize_address(('Tó Example', 'tó@example.com'), 'utf-8'),
            '=?utf-8?q?T=C3=B3_Example?= <=?utf-8?b?dMOz?=@example.com>'
        )


class PythonGlobalState(SimpleTestCase):
    """
@@ -1026,6 +1063,15 @@ class FakeSMTPServer(smtpd.SMTPServer, threading.Thread):
            data = data.encode('utf-8')
        m = message_from_bytes(data)
        maddr = parseaddr(m.get('from'))[1]

        if mailfrom != maddr:
            # According to the spec, mailfrom does not necessarily match the
            # From header - on Python 3 this is the case where the local part
            # isn't encoded, so try to correct that.
            lp, domain = mailfrom.split('@', 1)
            lp = Header(lp, 'utf-8').encode()
            mailfrom = '@'.join([lp, domain])

        if mailfrom != maddr:
            return "553 '%s' != '%s'" % (mailfrom, maddr)
        with self.sink_lock: