Commit 8dc11dc5 authored by Aymeric Augustin's avatar Aymeric Augustin
Browse files

[1.9.x] Fixed #25302 (again) -- Ignored scheme when checking for bad referers.

The check introduced in 4ce433e8 was too strict in real life. The poorly
implemented bots this patch attempted to ignore are sloppy when it comes
to http vs. https.

Backport of 11f10b70 from master
parent b4a1d545
Loading
Loading
Loading
Loading
+11 −6
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ from django.core.exceptions import PermissionDenied
from django.core.mail import mail_managers
from django.utils.cache import get_conditional_response, set_response_etag
from django.utils.encoding import force_text
from django.utils.six.moves.urllib.parse import urlparse

logger = logging.getLogger('django.request')

@@ -163,13 +164,17 @@ class BrokenLinkEmailsMiddleware(object):
        according to project settings or in three specific situations:
         - If the referer is empty.
         - If a '?' in referer is identified as a search engine source.
         - If the referer is equal to the current URL (assumed to be a
           malicious bot).
         - If the referer is equal to the current URL, ignoring the scheme
           (assumed to be a poorly implemented bot).
        """
        full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/'))
        if (not referer or
                (not self.is_internal_request(domain, referer) and '?' in referer) or
                (referer == uri or referer == full_url)):
        if not referer:
            return True

        if not self.is_internal_request(domain, referer) and '?' in referer:
            return True

        parsed_referer = urlparse(referer)
        if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri:
            return True

        return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
+9 −0
Original line number Diff line number Diff line
@@ -383,11 +383,20 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase):
        self.req.META['HTTP_REFERER'] = self.req.path
        BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
        self.assertEqual(len(mail.outbox), 0)

        # URL with scheme and domain should also be ignored
        self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path
        BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
        self.assertEqual(len(mail.outbox), 0)

        # URL with a different scheme should be ignored as well because bots
        # tend to use http:// in referers even when browsing HTTPS websites.
        self.req.META['HTTP_X_PROTO'] = 'https'
        self.req.META['SERVER_PORT'] = 443
        with self.settings(SECURE_PROXY_SSL_HEADER=('HTTP_X_PROTO', 'https')):
            BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
        self.assertEqual(len(mail.outbox), 0)

    def test_referer_equal_to_requested_url_on_another_domain(self):
        self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path
        BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)