Ticket #5418: assert_no_broken_links_with_tests_and_doc.diff

File assert_no_broken_links_with_tests_and_doc.diff, 12.7 KB (added by absoludity, 8 years ago)

New assertNoBrokenLinks (using HTMLParser) with regression tests and docs.

  • django/test/testcases.py

     
    190190        self.failIf(template_name in template_names,
    191191            (u"Template '%s' was used unexpectedly in rendering the"
    192192             u" response") % template_name)
     193
     194    def assertNoBrokenLinks(self, response, internal_only=True):
     195        """
     196        Asserts that all the links within the response, when followed, return
     197        a valid page (a 200) or a redirect (302).
     198       
     199        Current issues/thoughts:
     200          * Should we follow 302's to verify the page redirects to a 200 result?
     201        """
     202        non_broken_status_codes = (200, 301, 302, 304)
     203
     204        # Create the parser to grab the internal and external links
     205        import HTMLParser
     206
     207        class AnchorParser(HTMLParser.HTMLParser):
     208            external_href_re = re.compile(r'^https?://', re.IGNORECASE)
     209            ignore_href_re = re.compile(r'^(mailto|ftp):', re.IGNORECASE)
     210           
     211            def __init__(self):
     212                self.hrefs_internal = []
     213                self.hrefs_external = []
     214                self.reset()
     215       
     216            def handle_starttag(self, tag, attrs):
     217                if tag == "a":                       
     218                    for k, v in attrs:
     219                        if k == "href":
     220                            if self.ignore_href_re.match(v):
     221                                break
     222                           
     223                            if self.external_href_re.match(v):
     224                                self.hrefs_external.append(v)
     225                            else:
     226                                self.hrefs_internal.append(v)
     227                            break
     228
     229        p = AnchorParser()
     230        p.feed(response.content)
     231
     232        # Check the internal links first:
     233        for link in p.hrefs_internal:
     234            link_response = response.client.get(link)
     235            self.failUnless(
     236                link_response.status_code in non_broken_status_codes,
     237                (u"The link '%(link)s' appears to be broken (status is %(status)s)") % {
     238                    'link': link,
     239                    'status': link_response.status_code                                                       
     240                }           
     241            )
     242
     243        # Then check the external links
     244        if not internal_only:
     245            import urllib2
     246            from django.conf import settings
     247
     248            headers = {
     249                "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
     250                "Accept-Language": "en-us,en;q=0.5",
     251                "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
     252                "Connection": "close",
     253                "User-Agent": settings.URL_VALIDATOR_USER_AGENT,
     254            }
     255
     256            for link in p.hrefs_external:
     257                try:
     258                    req = urllib2.Request(link, None, headers)
     259                    u = urllib2.urlopen(req)
     260                except ValueError:
     261                    self.fail(u"The URL '%s' appears to be invalid." % link)
     262                except: # urllib2.URLError, httplib.InvalidURL, etc.
     263                    self.fail(u"The URL '%s' appears to be a broken link." % link)
  • tests/regressiontests/test_client_regress/views.py

     
    11from django.contrib.auth.decorators import login_required
    22from django.core.mail import EmailMessage, SMTPConnection
    3 from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError
     3from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError, HttpResponseNotFound
    44from django.shortcuts import render_to_response
    55
    66def no_template_view(request):
     
    2727def login_protected_redirect_view(request):
    2828    "A view that redirects all requests to the GET view"
    2929    return HttpResponseRedirect('/test_client_regress/get_view/')
    30 login_protected_redirect_view = login_required(login_protected_redirect_view)
    31  No newline at end of file
     30login_protected_redirect_view = login_required(login_protected_redirect_view)
     31
     32def no_broken_links_view(request):
     33    return HttpResponse(
     34    """
     35    Lots of html stuff, including a few local links:
     36      <a href="/test_client_regress/get_view/">here</a>
     37      <a href="/test_client_regress/file_upload">This should redirect to add the /</a>
     38      <a some="attribute" href="/test_client_regress/no_template_view/">
     39        a redirect view. Doesn't matter if closing tag missing.
     40       
     41    Some external links:
     42      <a href="http://djangoproject.com/weblog/">Django blog</a>
     43
     44    Some anchor links should be ignored, such as:
     45      <a href="mailto:me@example.com">Send me an email</a>, or
     46      <a href="ftp://example.com">Download from here</a>
     47   
     48    Including a <a lats of bad="stuff" href="http://djangoproject.com">Missing slash</a>
     49    Whole bunch of other stuff before the page ends.
     50    """                                     
     51    )
     52   
     53def broken_external_link_view(request):
     54    return HttpResponse(
     55    """
     56    Lots of html stuff, including a few local links:
     57      <a href="/test_client_regress/get_view/">here</a>
     58      <a href="/test_client_regress/file_upload">This should redirect to add the /</a>
     59      <a some="attribute" href="/test_client_regress/no_template_view/">
     60        a redirect view. Doesn't matter if closing tag missing.
     61       
     62    Some external links:
     63      <a class="this one's fine" href="http://djangoproject.com/weblog">Django blog</a>
     64    But this one's a
     65      <a href="http://djangoproject.com/badlink.html">Broken link</a>
     66    Whole bunch of other stuff before the page ends.
     67    """                                     
     68    )
     69   
     70def broken_internal_link_view(request):
     71    return HttpResponse(
     72    """
     73    Lots of html stuff, including a few local links:
     74
     75      <a class="test" href="/test_client_regress/broken_view/">A broken view</a>
     76       
     77    """                                     
     78    )
     79
     80def bad_internal_link_view(request):
     81    return HttpResponse(
     82    """
     83    Lots of html stuff, including a few local links:
     84
     85      <a class="test" href="/test_client_regress/bad_view/">A bad view</a>
     86   
     87    """                                     
     88    )
     89   
     90def invalid_external_link_view(request):
     91    return HttpResponse(
     92    """
     93    Lots of html stuff, including a few local links:
     94
     95      <a class="test" href="http://djangoproject&.com">An invalid link</a>
     96       
     97    """                                     
     98    )
     99
     100def broken_view(request):
     101    return HttpResponseServerError()
     102
     103def bad_view(request):
     104    return HttpResponseNotFound()
     105
     106
     107
     108   
     109 No newline at end of file
  • tests/regressiontests/test_client_regress/models.py

     
    233233        except AssertionError, e:
    234234            self.assertEqual(str(e), "The form 'form' in context 0 does not contain the non-field error 'Some error.' (actual errors: )")       
    235235
     236class AssertNoBrokenLinksTests(TestCase):
     237    def test_no_broken_links(self):
     238        "Tests that assertion confirms internal and external non-broken links."
     239       
     240        response = self.client.get('/test_client_regress/no_broken_links_view/')
     241        self.assertEqual(response.status_code, 200)
     242               
     243        self.assertNoBrokenLinks(response, internal_only=False)
     244       
     245    def test_broken_external_link(self):
     246        "Tests that assertion finds broken external links"
     247
     248        response = self.client.get('/test_client_regress/broken_external_link_view/')
     249        self.assertEqual(response.status_code, 200)
     250       
     251        # No internal broken links:
     252        self.assertNoBrokenLinks(response)
     253
     254        # But there is an external broken link:
     255        assertion_raised=True # Just to make sure we can check that the error was raised.
     256        try:
     257            self.assertNoBrokenLinks(response, internal_only=False)
     258            essertion_raised = False # Should not get here
     259        except AssertionError, e:
     260            self.assertEqual(str(e), "The URL 'http://djangoproject.com/badlink.html' appears to be a broken link.")
     261           
     262        self.assertTrue(assertion_raised)
     263
     264    def test_invalid_external_link(self):
     265        "Tests that assertion finds invalid external links"
     266
     267        response = self.client.get('/test_client_regress/invalid_external_link_view/')
     268        self.assertEqual(response.status_code, 200)
     269       
     270        assertion_raised=True
     271        try:
     272            self.assertNoBrokenLinks(response, internal_only=False)
     273            essertion_raised = False # Should not get here
     274        except AssertionError, e:
     275            self.assertEqual(str(e), "The URL 'http://djangoproject&.com' appears to be a broken link.")
     276           
     277        self.assertTrue(assertion_raised)
     278       
     279    def test_broken_internal_link(self):
     280        "Tests that assertion finds broken internal links"
     281
     282        response = self.client.get('/test_client_regress/broken_internal_link_view/')
     283        self.assertEqual(response.status_code, 200)
     284       
     285        assertion_raised=True
     286        try:
     287            self.assertNoBrokenLinks(response)
     288            essertion_raised = False # Should not get here
     289        except AssertionError, e:
     290            self.assertEqual(str(e), "The link '/test_client_regress/broken_view/' appears to be broken (status is 500)")
     291           
     292        self.assertTrue(assertion_raised)
     293       
     294    def test_bad_internal_link(self):
     295        "Tests that assertion finds bad internal links"
     296
     297        response = self.client.get('/test_client_regress/bad_internal_link_view/')
     298        self.assertEqual(response.status_code, 200)
     299       
     300        assertion_raised=True
     301        try:
     302            self.assertNoBrokenLinks(response)
     303            essertion_raised = False # Should not get here
     304        except AssertionError, e:
     305            self.assertEqual(str(e), "The link '/test_client_regress/bad_view/' appears to be broken (status is 404)")
     306           
     307        self.assertTrue(assertion_raised)
     308
     309
    236310class FileUploadTests(TestCase):
    237311    def test_simple_upload(self):
    238312        fd = open(os.path.join(os.path.dirname(__file__), "views.py"))
  • tests/regressiontests/test_client_regress/urls.py

     
    55    (r'^no_template_view/$', views.no_template_view),
    66    (r'^file_upload/$', views.file_upload_view),
    77    (r'^get_view/$', views.get_view),
    8     (r'^login_protected_redirect_view/$', views.login_protected_redirect_view)
     8    (r'^login_protected_redirect_view/$', views.login_protected_redirect_view),
     9    (r'^no_broken_links_view/$', views.no_broken_links_view),
     10    (r'^broken_external_link_view/$', views.broken_external_link_view),
     11    (r'^broken_internal_link_view/$', views.broken_internal_link_view),
     12    (r'^bad_internal_link_view/$', views.bad_internal_link_view),
     13    (r'^invalid_external_link_view/$', views.invalid_external_link_view),
     14    (r'^broken_view/$', views.broken_view),
     15    (r'^bad_view/$', views.bad_view)
    916)
  • AUTHORS

     
    199199    Jason McBrayer <http://www.carcosa.net/jason/>
    200200    mccutchen@gmail.com
    201201    michael.mcewan@gmail.com
     202    Michael Nelson <http://liveandletlearn.net/>
    202203    mikko@sorl.net
    203204    Slawek Mikula <slawek dot mikula at gmail dot com>
    204205    mitakummaa@gmail.com
  • docs/testing.txt

     
    842842
    843843    The name is a string such as ``'admin/index.html'``.
    844844
     845``assertNoBrokenLinks(response, internal_only=True)``
     846    Asserts that all the anchor links within the response are not broken (ie.
     847    result in a status of 200 or a redirect). By default only internal links will
     848    be checked (ie. those not beginning with http:// or https://). Note: As this
     849    assertion effectively clicks on all the links within the response, care needs
     850    to be taken if any link has a side effect (such as modifying your database).
     851
    845852E-mail services
    846853---------------
    847854
Back to Top