Index: django/test/testcases.py
===================================================================
--- django/test/testcases.py	(revision 6122)
+++ django/test/testcases.py	(working copy)
@@ -11,6 +11,49 @@
 
 normalize_long_ints = lambda s: re.sub(r'(?<![\w])(\d+)L(?![\w])', '\\1', s)
 
+# anchor_external_re matches the value of the href attribute for any
+# external anchor link (ie. where the value of the href attribute begins
+# with http:// or https://.
+anchor_external_re = re.compile(
+    r'<a'
+    r'.+?' # anything after the opening tag up to the href
+    r'href=(?:\'|")' # The opening of the href attribute
+    r'(' # group the value of the href attribute
+        r'https?://' # http:// or https://
+        r'(?:'
+            r'(?:[A-Z0-9-]+\.)+[A-Z]{2,6}' # domain
+            r'|localhost' # or localhost
+            r'|127.0.0.1' # or loopback
+        r')'
+        r'(?::\d+)?' # optional port
+        r'(?:/?|/\S+)' # followed by either a / or nothing or /nonspacechars.html
+    r')' # finish matching at the end of the href value.
+    r'(?:\'|")', # The closing quote of the href attribute
+    re.IGNORECASE
+)
+
+# anchor_internal_re matches the value of the href attribute for any
+# internal anchor link (ie. where the value does not begin with http://
+# or https://S
+anchor_internal_re = re.compile(
+    r'<a'
+    r'.+?' # anything after the opening tag up to the href
+    r'href=(?:\'|")' # The opening of the href attribute
+    r'(' # match the value of the href attribute
+    r'(?!https?://)' # make sure link doesn't start with http:// or https://
+    r'(?:\S+)' # Match any non-whitespace
+    r')' # finish matching at the end of the href value.
+    r'(?:\'|")', # The closing quote of the href attribute
+    re.IGNORECASE
+)
+
+try:
+    from django.conf import settings
+    URL_VALIDATOR_USER_AGENT = settings.URL_VALIDATOR_USER_AGENT
+except (ImportError, EnvironmentError):
+    # It's OK if Django settings aren't configured.
+    URL_VALIDATOR_USER_AGENT = 'Django (http://www.djangoproject.com/)'
+
 def to_list(value):
     """
     Puts value into a list if it's not already one.
@@ -190,3 +233,58 @@
         self.failIf(template_name in template_names,
             (u"Template '%s' was used unexpectedly in rendering the"
              u" response") % template_name)
+
+    def assertNoBrokenLinks(self, response, internal_only=True):
+        """
+        Asserts that all the links within the response, when followed, return
+        a valid page (a 200) or a redirect (302).
+        
+        Current issues/thoughts:
+          * How to handle absolute paths to http://localhost:8000 for eg.
+          * The re's will also find page-internal links like #header
+          * Should we follow 302's to verify the page redirects to a 200 result?
+          * Do we need to include other protocols for external links 
+            (currently anchor_external_re assumes href value begins with
+            http:// or https://, but what about ftp:// etc.?)
+        """
+        non_broken_status_codes = (200, 301, 302, 304)
+        
+        # Check the internal links first:
+        internal_links = anchor_internal_re.findall(response.content)
+                
+        for link in internal_links:
+            #print "Internal link: %s" % link
+            link_response = response.client.get(link)
+            #print "status: %s\n" % link_response.status_code
+            self.failUnless(
+                link_response.status_code in non_broken_status_codes,
+                (u"Link '%(link)s' returned a status of %(status)s") % {
+                    'link': link,
+                    'status': link_response.status_code                                                        
+                }           
+            )
+
+        # Then check the external links
+        if not internal_only:
+            import urllib2
+            from django.conf import settings
+
+            external_links = anchor_external_re.findall(response.content)
+            headers = {
+                "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
+                "Accept-Language": "en-us,en;q=0.5",
+                "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
+                "Connection": "close",
+                "User-Agent": URL_VALIDATOR_USER_AGENT,
+            }
+
+            for link in external_links:
+                #print "External link: %s" % link
+                try:
+                    req = urllib2.Request(link, None, headers)
+                    u = urllib2.urlopen(req)
+                except ValueError:
+                    self.fail(u'The URL %s appears to be invalid.')
+                except: # urllib2.URLError, httplib.InvalidURL, etc.
+                    self.fail(u'This URL %s appears to be a broken link.' % link)
+                #print "Status: %s" % u.info().status
\ No newline at end of file