Loading django/middleware/common.py +11 −6 Original line number Diff line number Diff line Loading @@ -8,6 +8,7 @@ from django.core.exceptions import PermissionDenied from django.core.mail import mail_managers from django.utils.cache import get_conditional_response, set_response_etag from django.utils.encoding import force_text from django.utils.six.moves.urllib.parse import urlparse logger = logging.getLogger('django.request') Loading Loading @@ -163,13 +164,17 @@ class BrokenLinkEmailsMiddleware(object): according to project settings or in three specific situations: - If the referer is empty. - If a '?' in referer is identified as a search engine source. - If the referer is equal to the current URL (assumed to be a malicious bot). - If the referer is equal to the current URL, ignoring the scheme (assumed to be a poorly implemented bot). """ full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/')) if (not referer or (not self.is_internal_request(domain, referer) and '?' in referer) or (referer == uri or referer == full_url)): if not referer: return True if not self.is_internal_request(domain, referer) and '?' in referer: return True parsed_referer = urlparse(referer) if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri: return True return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) tests/middleware/tests.py +9 −0 Original line number Diff line number Diff line Loading @@ -383,11 +383,20 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase): self.req.META['HTTP_REFERER'] = self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) # URL with scheme and domain should also be ignored self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) # URL with a different scheme should be ignored as well because bots # tend to use http:// in referers even when browsing HTTPS websites. self.req.META['HTTP_X_PROTO'] = 'https' self.req.META['SERVER_PORT'] = 443 with self.settings(SECURE_PROXY_SSL_HEADER=('HTTP_X_PROTO', 'https')): BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) def test_referer_equal_to_requested_url_on_another_domain(self): self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) Loading Loading
django/middleware/common.py +11 −6 Original line number Diff line number Diff line Loading @@ -8,6 +8,7 @@ from django.core.exceptions import PermissionDenied from django.core.mail import mail_managers from django.utils.cache import get_conditional_response, set_response_etag from django.utils.encoding import force_text from django.utils.six.moves.urllib.parse import urlparse logger = logging.getLogger('django.request') Loading Loading @@ -163,13 +164,17 @@ class BrokenLinkEmailsMiddleware(object): according to project settings or in three specific situations: - If the referer is empty. - If a '?' in referer is identified as a search engine source. - If the referer is equal to the current URL (assumed to be a malicious bot). - If the referer is equal to the current URL, ignoring the scheme (assumed to be a poorly implemented bot). """ full_url = "%s://%s/%s" % (request.scheme, domain, uri.lstrip('/')) if (not referer or (not self.is_internal_request(domain, referer) and '?' in referer) or (referer == uri or referer == full_url)): if not referer: return True if not self.is_internal_request(domain, referer) and '?' in referer: return True parsed_referer = urlparse(referer) if parsed_referer.netloc in ['', domain] and parsed_referer.path == uri: return True return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
tests/middleware/tests.py +9 −0 Original line number Diff line number Diff line Loading @@ -383,11 +383,20 @@ class BrokenLinkEmailsMiddlewareTest(SimpleTestCase): self.req.META['HTTP_REFERER'] = self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) # URL with scheme and domain should also be ignored self.req.META['HTTP_REFERER'] = 'http://testserver%s' % self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) # URL with a different scheme should be ignored as well because bots # tend to use http:// in referers even when browsing HTTPS websites. self.req.META['HTTP_X_PROTO'] = 'https' self.req.META['SERVER_PORT'] = 443 with self.settings(SECURE_PROXY_SSL_HEADER=('HTTP_X_PROTO', 'https')): BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) def test_referer_equal_to_requested_url_on_another_domain(self): self.req.META['HTTP_REFERER'] = 'http://anotherserver%s' % self.req.path BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) Loading