Loading AUTHORS +1 −0 Original line number Diff line number Diff line Loading @@ -472,6 +472,7 @@ answer newbie questions, and generally made Django that much better: Jyrki Pulliainen <jyrki.pulliainen@gmail.com> Thejaswi Puthraya <thejaswi.puthraya@gmail.com> Johann Queuniet <johann.queuniet@adh.naellia.eu> Ram Rachum <ram@rachum.com> Jan Rademaker Michael Radziej <mir@noris.de> Laurent Rahuel <laurent.rahuel@gmail.com> Loading django/middleware/common.py +15 −9 Original line number Diff line number Diff line Loading @@ -142,15 +142,17 @@ class BrokenLinkEmailsMiddleware(object): domain = request.get_host() path = request.get_full_path() referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace') is_internal = self.is_internal_request(domain, referer) is_not_search_engine = '?' not in referer is_ignorable = self.is_ignorable_404(path) if referer and (is_internal or is_not_search_engine) and not is_ignorable: if not self.is_ignorable_request(request, path, domain, referer): ua = request.META.get('HTTP_USER_AGENT', '<none>') ip = request.META.get('REMOTE_ADDR', '<none>') mail_managers( "Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain), "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip), "Broken %slink on %s" % ( ('INTERNAL ' if self.is_internal_request(domain, referer) else ''), domain ), "Referrer: %s\nRequested URL: %s\nUser agent: %s\n" "IP address: %s\n" % (referer, path, ua, ip), fail_silently=True) return response Loading @@ -159,10 +161,14 @@ class BrokenLinkEmailsMiddleware(object): Returns True if the referring URL is the same domain as the current request. """ # Different subdomains are treated as different domains. return re.match("^https?://%s/" % re.escape(domain), referer) return bool(re.match("^https?://%s/" % re.escape(domain), referer)) def is_ignorable_404(self, uri): def is_ignorable_request(self, request, uri, domain, referer): """ Returns True if a 404 at the given URL *shouldn't* notify the site managers. Returns True if the given request *shouldn't* notify the site managers. """ # '?' in referer is identified as search engine source if (not referer or (not self.is_internal_request(domain, referer) and '?' in referer)): return True return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) docs/howto/error-reporting.txt +5 −0 Original line number Diff line number Diff line Loading @@ -98,6 +98,11 @@ crawlers often request:: (Note that these are regular expressions, so we put a backslash in front of periods to escape them.) If you'd like to customize the behavior of :class:`django.middleware.common.BrokenLinkEmailsMiddleware` further (for example to ignore requests coming from web crawlers), you should subclass it and override its methods. .. seealso:: 404 errors are logged using the logging framework. By default, these log Loading tests/middleware/tests.py +19 −0 Original line number Diff line number Diff line Loading @@ -326,6 +326,25 @@ class BrokenLinkEmailsMiddlewareTest(TestCase): BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 1) def test_custom_request_checker(self): class SubclassedMiddleware(BrokenLinkEmailsMiddleware): ignored_user_agent_patterns = (re.compile(r'Spider.*'), re.compile(r'Robot.*')) def is_ignorable_request(self, request, uri, domain, referer): '''Check user-agent in addition to normal checks.''' if super(SubclassedMiddleware, self).is_ignorable_request(request, uri, domain, referer): return True user_agent = request.META['HTTP_USER_AGENT'] return any(pattern.search(user_agent) for pattern in self.ignored_user_agent_patterns) self.req.META['HTTP_REFERER'] = '/another/url/' self.req.META['HTTP_USER_AGENT'] = 'Spider machine 3.4' SubclassedMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) self.req.META['HTTP_USER_AGENT'] = 'My user agent' SubclassedMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 1) class ConditionalGetMiddlewareTest(TestCase): urls = 'middleware.cond_get_urls' Loading Loading
AUTHORS +1 −0 Original line number Diff line number Diff line Loading @@ -472,6 +472,7 @@ answer newbie questions, and generally made Django that much better: Jyrki Pulliainen <jyrki.pulliainen@gmail.com> Thejaswi Puthraya <thejaswi.puthraya@gmail.com> Johann Queuniet <johann.queuniet@adh.naellia.eu> Ram Rachum <ram@rachum.com> Jan Rademaker Michael Radziej <mir@noris.de> Laurent Rahuel <laurent.rahuel@gmail.com> Loading
django/middleware/common.py +15 −9 Original line number Diff line number Diff line Loading @@ -142,15 +142,17 @@ class BrokenLinkEmailsMiddleware(object): domain = request.get_host() path = request.get_full_path() referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace') is_internal = self.is_internal_request(domain, referer) is_not_search_engine = '?' not in referer is_ignorable = self.is_ignorable_404(path) if referer and (is_internal or is_not_search_engine) and not is_ignorable: if not self.is_ignorable_request(request, path, domain, referer): ua = request.META.get('HTTP_USER_AGENT', '<none>') ip = request.META.get('REMOTE_ADDR', '<none>') mail_managers( "Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain), "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip), "Broken %slink on %s" % ( ('INTERNAL ' if self.is_internal_request(domain, referer) else ''), domain ), "Referrer: %s\nRequested URL: %s\nUser agent: %s\n" "IP address: %s\n" % (referer, path, ua, ip), fail_silently=True) return response Loading @@ -159,10 +161,14 @@ class BrokenLinkEmailsMiddleware(object): Returns True if the referring URL is the same domain as the current request. """ # Different subdomains are treated as different domains. return re.match("^https?://%s/" % re.escape(domain), referer) return bool(re.match("^https?://%s/" % re.escape(domain), referer)) def is_ignorable_404(self, uri): def is_ignorable_request(self, request, uri, domain, referer): """ Returns True if a 404 at the given URL *shouldn't* notify the site managers. Returns True if the given request *shouldn't* notify the site managers. """ # '?' in referer is identified as search engine source if (not referer or (not self.is_internal_request(domain, referer) and '?' in referer)): return True return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
docs/howto/error-reporting.txt +5 −0 Original line number Diff line number Diff line Loading @@ -98,6 +98,11 @@ crawlers often request:: (Note that these are regular expressions, so we put a backslash in front of periods to escape them.) If you'd like to customize the behavior of :class:`django.middleware.common.BrokenLinkEmailsMiddleware` further (for example to ignore requests coming from web crawlers), you should subclass it and override its methods. .. seealso:: 404 errors are logged using the logging framework. By default, these log Loading
tests/middleware/tests.py +19 −0 Original line number Diff line number Diff line Loading @@ -326,6 +326,25 @@ class BrokenLinkEmailsMiddlewareTest(TestCase): BrokenLinkEmailsMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 1) def test_custom_request_checker(self): class SubclassedMiddleware(BrokenLinkEmailsMiddleware): ignored_user_agent_patterns = (re.compile(r'Spider.*'), re.compile(r'Robot.*')) def is_ignorable_request(self, request, uri, domain, referer): '''Check user-agent in addition to normal checks.''' if super(SubclassedMiddleware, self).is_ignorable_request(request, uri, domain, referer): return True user_agent = request.META['HTTP_USER_AGENT'] return any(pattern.search(user_agent) for pattern in self.ignored_user_agent_patterns) self.req.META['HTTP_REFERER'] = '/another/url/' self.req.META['HTTP_USER_AGENT'] = 'Spider machine 3.4' SubclassedMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 0) self.req.META['HTTP_USER_AGENT'] = 'My user agent' SubclassedMiddleware().process_response(self.req, self.resp) self.assertEqual(len(mail.outbox), 1) class ConditionalGetMiddlewareTest(TestCase): urls = 'middleware.cond_get_urls' Loading