Ticket #20099: 0001-Implement-BrokenLinkEmailsMiddleware.is_request_we_s.2.patch

File 0001-Implement-BrokenLinkEmailsMiddleware.is_request_we_s.2.patch, 6.1 KB (added by Ram Rachum, 11 years ago)
  • django/middleware/common.py

    From edad9ccfac77918c1be85d674908f99cc155f685 Mon Sep 17 00:00:00 2001
    From: Ram Rachum <ram@rachum.com>
    Date: Sun, 24 Mar 2013 18:57:51 +0200
    Subject: [PATCH] Implement
     `BrokenLinkEmailsMiddleware.is_request_we_should_notify_for`
    
    ---
     django/middleware/common.py    | 56 ++++++++++++++++++++++++++++--------------
     docs/howto/error-reporting.txt |  5 ++++
     tests/middleware/tests.py      | 21 ++++++++++++++++
     3 files changed, 64 insertions(+), 18 deletions(-)
    
    diff --git a/django/middleware/common.py b/django/middleware/common.py
    index 92f8cb3..262c819 100644
    a b class BrokenLinkEmailsMiddleware(object):  
    137137        """
    138138        Send broken link emails for relevant 404 NOT FOUND responses.
    139139        """
    140         if response.status_code == 404 and not settings.DEBUG:
    141             domain = request.get_host()
    142             path = request.get_full_path()
    143             referer = request.META.get('HTTP_REFERER', '')
    144             is_internal = self.is_internal_request(domain, referer)
    145             is_not_search_engine = '?' not in referer
    146             is_ignorable = self.is_ignorable_404(path)
    147             if referer and (is_internal or is_not_search_engine) and not is_ignorable:
    148                 ua = request.META.get('HTTP_USER_AGENT', '<none>')
    149                 ip = request.META.get('REMOTE_ADDR', '<none>')
    150                 mail_managers(
    151                     "Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain),
    152                     "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip),
    153                     fail_silently=True)
     140       
     141        if response.status_code == 404 and not settings.DEBUG and \
     142                                 self.is_request_we_should_notify_for(request):
     143            is_internal = self.is_internal_request(request)
     144            mail_managers(
     145                "Broken %slink on %s" % (
     146                    ('INTERNAL ' if is_internal else ''),
     147                    request.get_host()
     148                ),
     149                "Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
     150                "IP address: %s\n" % (
     151                    request.META.get('HTTP_REFERER', ''),
     152                    request.get_full_path(),
     153                    request.META.get('HTTP_USER_AGENT', ''),
     154                    request.META.get('REMOTE_ADDR', '')
     155                ),
     156                fail_silently=True)
    154157        return response
    155158
    156     def is_internal_request(self, domain, referer):
     159    def is_internal_request(self, request):
    157160        """
    158161        Returns True if the referring URL is the same domain as the current request.
    159162        """
    160163        # Different subdomains are treated as different domains.
     164        referer = request.META.get('HTTP_REFERER', '')
     165        domain = request.get_host()
    161166        return re.match("^https?://%s/" % re.escape(domain), referer)
    162167
    163     def is_ignorable_404(self, uri):
     168    def is_request_we_should_notify_for(self, request):
    164169        """
    165         Returns True if a 404 at the given URL *shouldn't* notify the site managers.
     170        Should we notify that `request` resulted in a 404 error?
     171       
     172        This depends on the URL, the referer, and may be subclassed to check
     173        other things.
    166174        """
    167         return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
     175        referer = request.META.get('HTTP_REFERER', '')
     176       
     177        if not referer:
     178            return False
     179       
     180        is_internal = self.is_internal_request(request)
     181        is_search_engine = '?' in referer
     182       
     183        if not is_internal and is_search_engine:
     184            return False
     185       
     186        return not any(pattern.search(request.get_full_path()) for pattern in
     187                       settings.IGNORABLE_404_URLS)
  • docs/howto/error-reporting.txt

    diff --git a/docs/howto/error-reporting.txt b/docs/howto/error-reporting.txt
    index 27f11f4..987a503 100644
    a b crawlers often request::  
    9898(Note that these are regular expressions, so we put a backslash in front of
    9999periods to escape them.)
    100100
     101If you'd like to customize the behavior of
     102:class:`django.middleware.common.BrokenLinkEmailsMiddleware` further (for
     103example to ignore requests coming from web crawlers), you should subclass it
     104and override its methods.
     105
    101106.. seealso::
    102107
    103108   404 errors are logged using the logging framework. By default, these log
  • tests/middleware/tests.py

    diff --git a/tests/middleware/tests.py b/tests/middleware/tests.py
    index f2f7f4d..20dcc70 100644
    a b class BrokenLinkEmailsMiddlewareTest(TestCase):  
    320320        BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
    321321        self.assertEqual(len(mail.outbox), 0)
    322322
     323    def test_custom_request_checker(self):
     324        class SubclassedMiddleware(BrokenLinkEmailsMiddleware):
     325            ignored_user_agent_patterns = (re.compile(r'Spider.*'),
     326                                           re.compile(r'Robot.*'))
     327            def is_request_we_should_notify_for(self, request):
     328                '''Check user-agent in addition to normal checks.'''
     329                assert isinstance(request, HttpRequest)
     330                if not super(SubclassedMiddleware, self). \
     331                                      is_request_we_should_notify_for(request):
     332                    return False
     333                user_agent = request.META['HTTP_USER_AGENT']
     334                return not any(pattern.search(user_agent) for pattern in
     335                               self.ignored_user_agent_patterns)
    323336
     337        self.req.META['HTTP_REFERER'] = '/another/url/'
     338        self.req.META['HTTP_USER_AGENT'] = 'Spider machine 3.4'
     339        SubclassedMiddleware().process_response(self.req, self.resp)
     340        self.assertEqual(len(mail.outbox), 0)
     341        self.req.META['HTTP_USER_AGENT'] = 'My user agent'
     342        SubclassedMiddleware().process_response(self.req, self.resp)
     343        self.assertEqual(len(mail.outbox), 1)
     344   
    324345class ConditionalGetMiddlewareTest(TestCase):
    325346    urls = 'middleware.cond_get_urls'
    326347    def setUp(self):
Back to Top