Ticket #5418: assert_no_broken_links_with_tests_and_doc.2.diff
File assert_no_broken_links_with_tests_and_doc.2.diff, 19.1 KB (added by , 17 years ago) |
---|
-
django/test/testcases.py
186 186 self.failIf(template_name in template_names, 187 187 (u"Template '%s' was used unexpectedly in rendering the" 188 188 u" response") % template_name) 189 190 def assertNoBrokenLinks(self, response, internal_only=True): 191 """ 192 Asserts that all the links within the response, when followed, return 193 a valid page (a 200) or a redirect (302). 194 195 Blank links are also identified (such as <a href="">) as this is helpful 196 to identify when the url tag in <a href="{% url my-url-name arg1 %}"> 197 fails. 198 199 Internal page links (such as <a href="#content">Skip to content</a> are 200 also checked to ensure they are not broken (ie. that an element with the 201 id exists on the page). 202 203 Current issues/thoughts: 204 * Should we follow 302's to verify the page redirects to a 200 result? 205 """ 206 non_broken_status_codes = (200, 301, 302, 304) 207 208 # Create the parser to grab the internal and external links 209 import HTMLParser 210 211 class AnchorParser(HTMLParser.HTMLParser): 212 external_href_re = re.compile(r'^https?://', re.IGNORECASE) 213 ignore_href_re = re.compile(r'^(mailto|ftp):', re.IGNORECASE) 214 internal_id_href_re = re.compile(r'^#(.*)') 215 216 def __init__(self): 217 self.hrefs_internal = [] 218 self.hrefs_external = [] 219 self.interal_page_link_ids = [] 220 self.element_ids = [] 221 self.reset() 222 223 def handle_starttag(self, tag, attrs): 224 if tag == "a": 225 for k, v in attrs: 226 if k == "href": 227 # For each href that we're not ignoring, save the 228 # value and position 229 if self.ignore_href_re.match(v): 230 pass 231 elif self.external_href_re.match(v): 232 self.hrefs_external.append((v, self.getpos())) 233 elif self.internal_id_href_re.match(v): 234 # If this is of the form href="#content" then 235 # remember the actual id "content". 236 self.interal_page_link_ids.append( 237 ( 238 self.internal_id_href_re.match(v).groups()[0], 239 self.getpos() 240 ) 241 ) 242 else: 243 self.hrefs_internal.append((v, self.getpos())) 244 elif k == "id": 245 # An anchor link can have an id and be linked to 246 # via an internal page link too. 247 self.element_ids.append(v) 248 else: 249 # Go through the attributes of all the other tags so we know all 250 # the element id's within the page for internal page links. 251 for k, v in attrs: 252 if k == "id": 253 self.element_ids.append(v) 254 255 p = AnchorParser() 256 p.feed(response.content) 257 p.close() 258 259 # Check the internal links first: 260 for link, (lineno, offset) in p.hrefs_internal: 261 self.failIf( 262 ''==link, 263 (u"The page contains a link with an empty href on line %(lineno)d.") % { 264 'page': 'pagename', 265 'lineno': lineno, 266 'response': response 267 } 268 ) 269 270 link_response = response.client.get(link) 271 self.failUnless( 272 link_response.status_code in non_broken_status_codes, 273 (u"The link '%(link)s' on line %(lineno)d appears to be broken (status is %(status)s)") % { 274 'link': link, 275 'lineno': lineno, 276 'status': link_response.status_code 277 } 278 ) 279 280 # Next, check the internal page links: 281 for id, (lineno, offset) in p.interal_page_link_ids: 282 # If the id wasn't blank (ie. <a href="#"> then make sure that there 283 # was an element with the same id on the page somewhere. 284 if id: 285 self.failUnless( 286 id in p.element_ids, 287 ( 288 u"The internal link to #%(id)s on line %(lineno)d does" 289 u" not link to a corresponding element with an " 290 u"id=\"%(id)s\"." % { 291 'id': id, 292 'lineno': lineno 293 } 294 ) 295 ) 296 297 # Then check the external links 298 if not internal_only: 299 import urllib2 300 from django.conf import settings 301 302 headers = { 303 "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", 304 "Accept-Language": "en-us,en;q=0.5", 305 "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", 306 "Connection": "close", 307 "User-Agent": settings.URL_VALIDATOR_USER_AGENT, 308 } 309 310 for link, (lineno, offset) in p.hrefs_external: 311 try: 312 req = urllib2.Request(link, None, headers) 313 u = urllib2.urlopen(req) 314 except ValueError: 315 self.fail( 316 u"The link '%(link)s' on line %(lineno)d appears to be invalid." % { 317 'link': link, 318 'lineno': lineno 319 } 320 ) 321 except: # urllib2.URLError, httplib.InvalidURL, etc. 322 self.fail(u"The link '%(link)s' on line %(lineno)d appears to be broken." % { 323 'link': link, 324 'lineno': lineno 325 } 326 ) -
tests/regressiontests/test_client_regress/views.py
1 1 from django.contrib.auth.decorators import login_required 2 2 from django.core.mail import EmailMessage, SMTPConnection 3 from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError 3 from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError, HttpResponseNotFound 4 4 from django.shortcuts import render_to_response 5 5 6 6 def no_template_view(request): … … 27 27 def login_protected_redirect_view(request): 28 28 "A view that redirects all requests to the GET view" 29 29 return HttpResponseRedirect('/test_client_regress/get_view/') 30 login_protected_redirect_view = login_required(login_protected_redirect_view) 31 No newline at end of file 30 login_protected_redirect_view = login_required(login_protected_redirect_view) 31 32 def no_broken_links_view(request): 33 return HttpResponse( 34 """ 35 Lots of html stuff, including a few local links: 36 <a href="/test_client_regress/get_view/">here</a> 37 <a href="/test_client_regress/file_upload">This should redirect to add the /</a> 38 <a some="attribute" href="/test_client_regress/no_template_view/"> 39 a redirect view. Doesn't matter if closing tag missing. 40 41 Some external links: 42 <a href="http://djangoproject.com/weblog/">Django blog</a> 43 44 Some anchor links should be ignored, such as: 45 <a href="mailto:me@example.com">Send me an email</a>, or 46 <a href="ftp://example.com">Download from here</a> 47 48 Including a <a lats of bad="stuff" href="http://djangoproject.com">Missing slash</a> 49 Whole bunch of other stuff before the page ends. 50 """ 51 ) 52 53 def broken_external_link_view(request): 54 return HttpResponse( 55 """ 56 Lots of html stuff, including a few local links: 57 <a href="/test_client_regress/get_view/">here</a> 58 <a href="/test_client_regress/file_upload">This should redirect to add the /</a> 59 <a some="attribute" href="/test_client_regress/no_template_view/"> 60 a redirect view. Doesn't matter if closing tag missing. 61 62 Some external links: 63 <a class="this one's fine" href="http://djangoproject.com/weblog">Django blog</a> 64 But this one's a 65 <a href="http://djangoproject.com/badlink.html">Broken link</a> 66 Whole bunch of other stuff before the page ends. 67 """ 68 ) 69 70 def broken_internal_link_view(request): 71 return HttpResponse( 72 """ 73 Lots of html stuff, including a few local links: 74 75 <a class="test" href="/test_client_regress/broken_view/">A broken view</a> 76 77 """ 78 ) 79 80 def bad_internal_link_view(request): 81 return HttpResponse( 82 """ 83 Lots of html stuff, including a few local links: 84 85 <a class="test" href="/test_client_regress/bad_view/">A bad view</a> 86 87 """ 88 ) 89 90 def invalid_external_link_view(request): 91 return HttpResponse( 92 """ 93 Lots of html stuff, including a few local links: 94 95 <a class="test" href="http://djangoproject&.com">An invalid link</a> 96 97 """ 98 ) 99 100 def blank_link_view(request): 101 return HttpResponse( 102 """ 103 If a link uses the url template tag to create the link for the href like 104 this: 105 href="{% url my-url-name arg %}" 106 and fails, it will end up with blank href="", this would be useful to 107 catch! 108 109 <a class="test" href="">A blank link.</a> 110 111 """ 112 ) 113 114 def internal_page_link_view(request): 115 return HttpResponse( 116 """ 117 A link which is just internal to the page, href="#content" needs to 118 have a matching element with an id="content" on the page. 119 120 <a class="test" href="#content">This one should be fine</a> 121 122 <a href="#">This one should be ignored (lots of JS uses)</a> 123 124 <a href="#footer">But this one isn't valid</a> as there's no corresponding 125 element with the id="footer" 126 127 <div id="content"> 128 Here's the content 129 </div> 130 """ 131 ) 132 133 def broken_view(request): 134 return HttpResponseServerError() 135 136 def bad_view(request): 137 return HttpResponseNotFound() -
tests/regressiontests/test_client_regress/models.py
233 233 except AssertionError, e: 234 234 self.assertEqual(str(e), "The form 'form' in context 0 does not contain the non-field error 'Some error.' (actual errors: )") 235 235 236 class AssertNoBrokenLinksTests(TestCase): 237 def test_no_broken_links(self): 238 "Tests that assertion confirms internal and external non-broken links." 239 240 response = self.client.get('/test_client_regress/no_broken_links_view/') 241 self.assertEqual(response.status_code, 200) 242 243 self.assertNoBrokenLinks(response, internal_only=False) 244 245 def test_broken_external_link(self): 246 "Tests that assertion finds broken external links" 247 248 response = self.client.get('/test_client_regress/broken_external_link_view/') 249 self.assertEqual(response.status_code, 200) 250 251 # No internal broken links: 252 self.assertNoBrokenLinks(response) 253 254 # But there is an external broken link: 255 assertion_raised=False 256 try: 257 self.assertNoBrokenLinks(response, internal_only=False) 258 except AssertionError, e: 259 assertion_raised = True # Should always get here 260 self.assertEqual( 261 str(e), 262 "The link 'http://djangoproject.com/badlink.html' on line 11" 263 " appears to be broken." 264 ) 265 266 self.assertTrue(assertion_raised) 267 268 def test_invalid_external_link(self): 269 "Tests that assertion finds invalid external links" 270 271 response = self.client.get('/test_client_regress/invalid_external_link_view/') 272 self.assertEqual(response.status_code, 200) 273 274 assertion_raised=False 275 try: 276 self.assertNoBrokenLinks(response, internal_only=False) 277 except AssertionError, e: 278 assertion_raised = True # Should always get here 279 self.assertEqual( 280 str(e), 281 "The link 'http://djangoproject&.com' on line 4 appears" 282 " to be broken." 283 ) 284 285 self.assertTrue(assertion_raised) 286 287 def test_broken_internal_link(self): 288 "Tests that assertion finds broken internal links" 289 290 response = self.client.get('/test_client_regress/broken_internal_link_view/') 291 self.assertEqual(response.status_code, 200) 292 293 assertion_raised=False 294 try: 295 self.assertNoBrokenLinks(response) 296 except AssertionError, e: 297 assertion_raised = True # Should always get here 298 self.assertEqual( 299 str(e), 300 "The link '/test_client_regress/broken_view/' on line 4" 301 " appears to be broken (status is 500)" 302 ) 303 304 self.assertTrue(assertion_raised) 305 306 def test_bad_internal_link(self): 307 "Tests that assertion finds bad internal links" 308 309 response = self.client.get('/test_client_regress/bad_internal_link_view/') 310 self.assertEqual(response.status_code, 200) 311 312 assertion_raised=False 313 try: 314 self.assertNoBrokenLinks(response) 315 except AssertionError, e: 316 assertion_raised = True # Should always get here 317 self.assertEqual( 318 str(e), 319 "The link '/test_client_regress/bad_view/' on line 4 appears to" 320 " be broken (status is 404)" 321 ) 322 323 self.assertTrue(assertion_raised) 324 325 def test_blank_link(self): 326 "Tests that links with blank hrefs are identified appropriately" 327 328 response = self.client.get('/test_client_regress/blank_link_view/') 329 self.assertEqual(response.status_code, 200) 330 331 assertion_raised=False 332 try: 333 self.assertNoBrokenLinks(response) 334 except AssertionError, e: 335 assertion_raised = True # Should always get here 336 self.assertEqual( 337 str(e), 338 "The page contains a link with an empty href on line 8." 339 ) 340 341 self.assertTrue(assertion_raised) 342 343 def test_internal_page_link(self): 344 "Tests that internal page links are valid" 345 346 response = self.client.get('/test_client_regress/internal_page_link_view/') 347 self.assertEqual(response.status_code, 200) 348 349 assertion_raised=False 350 try: 351 self.assertNoBrokenLinks(response) 352 except AssertionError, e: 353 assertion_raised = True # Should always get here 354 self.assertEqual( 355 str(e), 356 "The internal link to #footer on line 9 does not link to a" 357 " corresponding element with an id=\"footer\"." 358 ) 359 360 self.assertTrue(assertion_raised) 361 236 362 class FileUploadTests(TestCase): 237 363 def test_simple_upload(self): 238 364 fd = open(os.path.join(os.path.dirname(__file__), "views.py")) -
tests/regressiontests/test_client_regress/urls.py
5 5 (r'^no_template_view/$', views.no_template_view), 6 6 (r'^file_upload/$', views.file_upload_view), 7 7 (r'^get_view/$', views.get_view), 8 (r'^login_protected_redirect_view/$', views.login_protected_redirect_view) 8 (r'^login_protected_redirect_view/$', views.login_protected_redirect_view), 9 # All the following urls are for the assertNoBrokenLinks feature: 10 (r'^no_broken_links_view/$', views.no_broken_links_view), 11 (r'^broken_external_link_view/$', views.broken_external_link_view), 12 (r'^broken_internal_link_view/$', views.broken_internal_link_view), 13 (r'^bad_internal_link_view/$', views.bad_internal_link_view), 14 (r'^invalid_external_link_view/$', views.invalid_external_link_view), 15 (r'^blank_link_view/$', views.blank_link_view), 16 (r'^internal_page_link_view/$', views.internal_page_link_view), 17 (r'^broken_view/$', views.broken_view), 18 (r'^bad_view/$', views.bad_view) 9 19 ) -
AUTHORS
210 210 Jason McBrayer <http://www.carcosa.net/jason/> 211 211 mccutchen@gmail.com 212 212 michael.mcewan@gmail.com 213 Michael Nelson <http://liveandletlearn.net/> 213 214 mikko@sorl.net 214 215 Slawek Mikula <slawek dot mikula at gmail dot com> 215 216 mitakummaa@gmail.com -
docs/testing.txt
846 846 847 847 The name is a string such as ``'admin/index.html'``. 848 848 849 ``assertNoBrokenLinks(response, internal_only=True)`` 850 Asserts that all the anchor links within the response are not broken (ie. 851 result in a status of 200 or a redirect). By default only links internal to 852 the site will be checked (ie. those not beginning with http:// or https://). 853 854 Internal page links such as <a href="#content"> are checked to ensure 855 that they are not broken (ie. that an element with the id="content" exists 856 on the page). 857 858 Blank links, such as <a href=""> are also identified, which is helpful 859 to check when the url tag in <a href="{% url my-url-name arg1 %}"> fails. 860 861 Note: As this assertion effectively clicks on all the links within the 862 response, care needs to be taken if any link has a side effect (such as 863 modifying your database). 864 849 865 E-mail services 850 866 --------------- 851 867