Ticket #16921: 16921-assertHTMLEqual.1.diff

File 16921-assertHTMLEqual.1.diff, 32.8 KB (added by Gregor Müllegger, 9 years ago)
  • new file django/test/html.py

    diff --git a/django/test/html.py b/django/test/html.py
    new file mode 100644
    index 0000000..30ad07a
    - +  
     1'''
     2Comparing two html documents.
     3'''
     4import re
     5from HTMLParser import HTMLParseError
     6from django.utils.htmlparser import HTMLParser
     7
     8
     9WHITESPACE = re.compile('\s+')
     10
     11
     12def normalize_whitespace(string):
     13    return WHITESPACE.sub(' ', string)
     14
     15
     16class Element(object):
     17    def __init__(self, name, attributes):
     18        self.name = name
     19        self.attributes = sorted(attributes)
     20        self.children = []
     21
     22    def append(self, element):
     23        if isinstance(element, basestring):
     24            element = normalize_whitespace(element)
     25            if self.children:
     26                if isinstance(self.children[-1], basestring):
     27                    self.children[-1] += element
     28                    self.children[-1] = normalize_whitespace(self.children[-1])
     29                    return
     30        elif self.children:
     31            # removing last children if it is only whitespace
     32            # this can result in incorrect dom representations since
     33            # whitespace between inline tags like <span> is significant
     34            if isinstance(self.children[-1], basestring):
     35                if self.children[-1].isspace():
     36                    self.children.pop()
     37        if element:
     38            self.children.append(element)
     39
     40    def finalize(self):
     41        def rstrip_last_element(children):
     42            if children:
     43                if isinstance(children[-1], basestring):
     44                    children[-1] = children[-1].rstrip()
     45                    if not children[-1]:
     46                        children.pop()
     47                        children = rstrip_last_element(children)
     48            return children
     49
     50        rstrip_last_element(self.children)
     51        for i, child in enumerate(self.children):
     52            if isinstance(child, basestring):
     53                self.children[i] = child.strip()
     54            elif hasattr(child, 'finalize'):
     55                child.finalize()
     56
     57    def __eq__(self, element):
     58        if self.name != element.name:
     59            return False
     60        if len(self.attributes) != len(element.attributes):
     61            return False
     62        if self.attributes != element.attributes:
     63            # attributes without a value is same as attribute with value that
     64            # equals the attributes name:
     65            # <input checked> == <input checked="checked">
     66            for i in range(len(self.attributes)):
     67                attr, value = self.attributes[i]
     68                other_attr, other_value = element.attributes[i]
     69                if value is None:
     70                    value = attr
     71                if other_value is None:
     72                    other_value = other_attr
     73                if attr != other_attr or value != other_value:
     74                    return False
     75        if self.children != element.children:
     76            return False
     77        return True
     78
     79    def __ne__(self, element):
     80        return not self.__eq__(element)
     81
     82    def _count(self, element, count=True):
     83        if not isinstance(element, basestring):
     84            if self == element:
     85                return 1
     86        i = 0
     87        for child in self.children:
     88            # child is text content and element is also text content, then
     89            # make a simple "text" in "text"
     90            if isinstance(child, basestring):
     91                if isinstance(element, basestring):
     92                    if count:
     93                        i += child.count(element)
     94                    elif element in child:
     95                        return 1
     96            else:
     97                i += child._count(element, count=count)
     98                if not count and i:
     99                    return i
     100        return i
     101
     102    def __contains__(self, element):
     103        return self._count(element, count=False) > 0
     104
     105    def count(self, element):
     106        return self._count(element, count=True)
     107
     108    def __getitem__(self, key):
     109        return self.children[key]
     110
     111    def __unicode__(self):
     112        output = u'<%s' % self.name
     113        for key, value in self.attributes:
     114            if value:
     115                output += u' %s="%s"' % (key, value)
     116            else:
     117                output += u' %s' % key
     118        if self.children:
     119            output += u'>\n'
     120            output += u''.join(unicode(c) for c in self.children)
     121            output += u'\n</%s>' % self.name
     122        else:
     123            output += u' />'
     124        return output
     125
     126    def __str__(self):
     127        return str(unicode(self))
     128
     129    def __repr__(self):
     130        return unicode(self)
     131
     132
     133class RootElement(Element):
     134    def __init__(self):
     135        super(RootElement, self).__init__(None, ())
     136
     137    def __unicode__(self):
     138        return u''.join(unicode(c) for c in self.children)
     139
     140
     141class Parser(HTMLParser):
     142    SELF_CLOSING_TAGS = ('br' , 'hr', 'input', 'img', 'meta', 'spacer',
     143        'link', 'frame', 'base', 'col')
     144
     145    def __init__(self):
     146        HTMLParser.__init__(self)
     147        self.root = RootElement()
     148        self.open_tags = []
     149        self.element_positions = {}
     150
     151    def error(self, msg):
     152        raise HTMLParseError(msg, self.getpos())
     153
     154    def format_position(self, position=None, element=None):
     155        if not position and element:
     156            position = self.element_positions[element]
     157        if position is None:
     158            position = self.getpos()
     159        if hasattr(position, 'lineno'):
     160            position = position.lineno, position.offset
     161        return 'Line %d, Column %d' % position
     162
     163    @property
     164    def current(self):
     165        if self.open_tags:
     166            return self.open_tags[-1]
     167        else:
     168            return self.root
     169
     170    def handle_startendtag(self, tag, attrs):
     171        self.handle_starttag(tag, attrs)
     172        if tag not in self.SELF_CLOSING_TAGS:
     173            self.handle_endtag(tag)
     174
     175    def handle_starttag(self, tag, attrs):
     176        element = Element(tag, attrs)
     177        self.current.append(element)
     178        if tag not in self.SELF_CLOSING_TAGS:
     179            self.open_tags.append(element)
     180        self.element_positions[element] = self.getpos()
     181
     182    def handle_endtag(self, tag):
     183        if not self.open_tags:
     184            self.error("Unexpected end tag `%s` (%s)" % (
     185                tag, self.format_position()))
     186        element = self.open_tags.pop()
     187        while element.name != tag:
     188            if not self.open_tags:
     189                self.error("Unexpected end tag `%s` (%s)" % (
     190                    tag, self.format_position()))
     191            element = self.open_tags.pop()
     192
     193    def handle_data(self, data):
     194        self.current.append(data)
     195
     196    def handle_charref(self, name):
     197        self.current.append('&%s;' % name)
     198
     199    def handle_entityref(self, name):
     200        self.current.append('&%s;' % name)
     201
     202
     203def parse_html(html):
     204    '''
     205    Takes a string that contains *valid* HTML and turns it into an python
     206    object structure that can be easily compared against other HTML on
     207    semantical equivilance. Syntactical differences like which quotation is
     208    used on arguments will be ignored.
     209    '''
     210    parser = Parser()
     211    parser.feed(html)
     212    parser.close()
     213    document = parser.root
     214    document.finalize()
     215    # Removing ROOT element if it's not necessary
     216    if len(document.children) == 1:
     217        if not isinstance(document.children[0], basestring):
     218            document = document.children[0]
     219    return document
  • django/test/testcases.py

    diff --git a/django/test/testcases.py b/django/test/testcases.py
    index 53ea02a..e009347 100644
    a b  
    11from __future__ import with_statement
    22
     3import difflib
    34import os
    45import re
    56import sys
    from django.forms.fields import CharField 
    2829from django.http import QueryDict
    2930from django.test import _doctest as doctest
    3031from django.test.client import Client
     32from django.test.html import HTMLParseError, parse_html
    3133from django.test.utils import (get_warnings_state, restore_warnings_state,
    3234    override_settings)
    3335from django.utils import simplejson, unittest as ut2
    3436from django.utils.encoding import smart_str, force_unicode
     37from django.utils.unittest.util import safe_repr
    3538from django.views.static import serve
    3639
    3740__all__ = ('DocTestRunner', 'OutputChecker', 'TestCase', 'TransactionTestCase',
    def restore_transaction_methods(): 
    7578    transaction.leave_transaction_management = real_leave_transaction_management
    7679    transaction.managed = real_managed
    7780
     81
     82def assert_and_parse_html(self, html, user_msg, msg):
     83    try:
     84        dom = parse_html(html)
     85    except HTMLParseError, e:
     86        standardMsg = u'%s\n%s' % (msg, e.msg)
     87        self.fail(self._formatMessage(user_msg, standardMsg))
     88    return dom
     89
     90
    7891class OutputChecker(doctest.OutputChecker):
    7992    def check_output(self, want, got, optionflags):
    8093        """
    class SimpleTestCase(ut2.TestCase): 
    348361            self.assertTrue(isinstance(fieldclass(*field_args, **field_kwargs),
    349362                                       fieldclass))
    350363
     364    def assertHTMLEqual(self, html1, html2, msg=None):
     365        """
     366        Asserts that two html snippets are semantically the same, e.g. whitespace
     367        in most cases is ignored, attribute ordering is not significant. The
     368        passed in arguments must be valid HTML.
     369        """
     370        dom1 = assert_and_parse_html(self, html1, msg,
     371            u'First argument is no valid html:')
     372        dom2 = assert_and_parse_html(self, html2, msg,
     373            u'Second argument is no valid html:')
     374
     375        if dom1 != dom2:
     376            standardMsg = '%s != %s' % (safe_repr(dom1, True), safe_repr(dom2, True))
     377            diff = ('\n' + '\n'.join(difflib.ndiff(
     378                           unicode(dom1).splitlines(),
     379                           unicode(dom2).splitlines())))
     380            standardMsg = self._truncateMessage(standardMsg, diff)
     381            self.fail(self._formatMessage(msg, standardMsg))
     382
     383    def assertHTMLNotEqual(self, html1, html2, msg=None):
     384        dom1 = assert_and_parse_html(self, html1, msg,
     385            u'First argument is no valid html')
     386        dom2 = assert_and_parse_html(self, html2, msg,
     387            u'Second argument is no valid html')
     388
     389        if not dom1 != dom2:
     390            standardMsg = '%s == %s' % (safe_repr(dom1, True), safe_repr(dom2, True))
     391            self.fail(self._formatMessage(msg, standardMsg))
     392
    351393
    352394class TransactionTestCase(SimpleTestCase):
    353395    # The class we'll use for the test client self.client.
    class TransactionTestCase(SimpleTestCase): 
    506548                (url, expected_url))
    507549
    508550    def assertContains(self, response, text, count=None, status_code=200,
    509                        msg_prefix=''):
     551                       msg_prefix='', html=False):
    510552        """
    511553        Asserts that a response indicates that some content was retrieved
    512554        successfully, (i.e., the HTTP status code was as expected), and that
    class TransactionTestCase(SimpleTestCase): 
    528570            msg_prefix + "Couldn't retrieve content: Response code was %d"
    529571            " (expected %d)" % (response.status_code, status_code))
    530572        text = smart_str(text, response._charset)
    531         real_count = response.content.count(text)
     573        content = response.content
     574        if html:
     575            content = assert_and_parse_html(self, content, None,
     576                u'Response\'s content is no valid html:')
     577            text = assert_and_parse_html(self, text, None,
     578                u'Second argument is no valid html:')
     579        real_count = content.count(text)
    532580        if count is not None:
    533581            self.assertEqual(real_count, count,
    534582                msg_prefix + "Found %d instances of '%s' in response"
    class TransactionTestCase(SimpleTestCase): 
    538586                msg_prefix + "Couldn't find '%s' in response" % text)
    539587
    540588    def assertNotContains(self, response, text, status_code=200,
    541                           msg_prefix=''):
     589                          msg_prefix='', html=False):
    542590        """
    543591        Asserts that a response indicates that some content was retrieved
    544592        successfully, (i.e., the HTTP status code was as expected), and that
    class TransactionTestCase(SimpleTestCase): 
    558606            msg_prefix + "Couldn't retrieve content: Response code was %d"
    559607            " (expected %d)" % (response.status_code, status_code))
    560608        text = smart_str(text, response._charset)
    561         self.assertEqual(response.content.count(text), 0,
     609        content = response.content
     610        if html:
     611            content = assert_and_parse_html(self, content, None,
     612                u'Response\'s content is no valid html:')
     613            text = assert_and_parse_html(self, text, None,
     614                u'Second argument is no valid html:')
     615        self.assertEqual(content.count(text), 0,
    562616            msg_prefix + "Response should not contain '%s'" % text)
    563617
    564618    def assertFormError(self, response, form, field, errors, msg_prefix=''):
    class TransactionTestCase(SimpleTestCase): 
    612666            self.fail(msg_prefix + "The form '%s' was not used to render the"
    613667                      " response" % form)
    614668
    615     def assertTemplateUsed(self, response, template_name, msg_prefix=''):
     669    def assertTemplateUsed(self, response=None, template_name=None, msg_prefix=''):
    616670        """
    617671        Asserts that the template with the provided name was used in rendering
    618         the response.
     672        the response. Also useable as context manager.
    619673        """
    620674        if msg_prefix:
    621675            msg_prefix += ": "
    class TransactionTestCase(SimpleTestCase): 
    628682            " the response. Actual template(s) used: %s" %
    629683                (template_name, u', '.join(template_names)))
    630684
    631     def assertTemplateNotUsed(self, response, template_name, msg_prefix=''):
     685    def assertTemplateNotUsed(self, response=None, template_name=None, msg_prefix=''):
    632686        """
    633687        Asserts that the template with the provided name was NOT used in
    634         rendering the response.
     688        rendering the response. Also useable as context manager.
    635689        """
    636690        if msg_prefix:
    637691            msg_prefix += ": "
  • new file django/utils/htmlparser.py

    diff --git a/django/utils/htmlparser.py b/django/utils/htmlparser.py
    new file mode 100644
    index 0000000..ed743f5
    - +  
     1import HTMLParser as _HTMLParser
     2
     3
     4class HTMLParser(_HTMLParser.HTMLParser):
     5    """
     6    Patched version of stdlib's HTMLParser with patch from:
     7    http://bugs.python.org/issue670664
     8    """
     9    def __init__(self):
     10        _HTMLParser.HTMLParser.__init__(self)
     11        self.cdata_tag = None
     12
     13    def set_cdata_mode(self, tag):
     14        self.interesting = _HTMLParser.interesting_cdata
     15        self.cdata_tag = tag.lower()
     16
     17    def clear_cdata_mode(self):
     18        self.interesting = _HTMLParser.interesting_normal
     19        self.cdata_tag = None
     20
     21    # Internal -- handle starttag, return end or -1 if not terminated
     22    def parse_starttag(self, i):
     23        self.__starttag_text = None
     24        endpos = self.check_for_whole_start_tag(i)
     25        if endpos < 0:
     26            return endpos
     27        rawdata = self.rawdata
     28        self.__starttag_text = rawdata[i:endpos]
     29
     30        # Now parse the data between i+1 and j into a tag and attrs
     31        attrs = []
     32        match = _HTMLParser.tagfind.match(rawdata, i + 1)
     33        assert match, 'unexpected call to parse_starttag()'
     34        k = match.end()
     35        self.lasttag = tag = rawdata[i + 1:k].lower()
     36
     37        while k < endpos:
     38            m = _HTMLParser.attrfind.match(rawdata, k)
     39            if not m:
     40                break
     41            attrname, rest, attrvalue = m.group(1, 2, 3)
     42            if not rest:
     43                attrvalue = None
     44            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
     45                 attrvalue[:1] == '"' == attrvalue[-1:]:
     46                attrvalue = attrvalue[1:-1]
     47                attrvalue = self.unescape(attrvalue)
     48            attrs.append((attrname.lower(), attrvalue))
     49            k = m.end()
     50
     51        end = rawdata[k:endpos].strip()
     52        if end not in (">", "/>"):
     53            lineno, offset = self.getpos()
     54            if "\n" in self.__starttag_text:
     55                lineno = lineno + self.__starttag_text.count("\n")
     56                offset = len(self.__starttag_text) \
     57                         - self.__starttag_text.rfind("\n")
     58            else:
     59                offset = offset + len(self.__starttag_text)
     60            self.error("junk characters in start tag: %r"
     61                       % (rawdata[k:endpos][:20],))
     62        if end.endswith('/>'):
     63            # XHTML-style empty tag: <span attr="value" />
     64            self.handle_startendtag(tag, attrs)
     65        else:
     66            self.handle_starttag(tag, attrs)
     67            if tag in self.CDATA_CONTENT_ELEMENTS:
     68                self.set_cdata_mode(tag) # <--------------------------- Changed
     69        return endpos
     70
     71    # Internal -- parse endtag, return end or -1 if incomplete
     72    def parse_endtag(self, i):
     73        rawdata = self.rawdata
     74        assert rawdata[i:i + 2] == "</", "unexpected call to parse_endtag"
     75        match = _HTMLParser.endendtag.search(rawdata, i + 1) # >
     76        if not match:
     77            return -1
     78        j = match.end()
     79        match = _HTMLParser.endtagfind.match(rawdata, i) # </ + tag + >
     80        if not match:
     81            if self.cdata_tag is not None: # *** add ***
     82                self.handle_data(rawdata[i:j]) # *** add ***
     83                return j # *** add ***
     84            self.error("bad end tag: %r" % (rawdata[i:j],))
     85        # --- changed start ---------------------------------------------------
     86        tag = match.group(1).strip()
     87        if self.cdata_tag is not None:
     88            if tag.lower() != self.cdata_tag:
     89                self.handle_data(rawdata[i:j])
     90                return j
     91        # --- changed end -----------------------------------------------------
     92        self.handle_endtag(tag.lower())
     93        self.clear_cdata_mode()
     94        return j
  • docs/releases/1.4.txt

    diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt
    index 9b3c219..a2d7333 100644
    a b Time zone support is enabled by default in new projects created with 
    475475:djadmin:`startproject`. If you want to use this feature in an existing
    476476project, read the :ref:`migration guide <time-zones-migration-guide>`.
    477477
     478HTML comparisons in tests
     479~~~~~~~~~~~~~~~~~~~~~~~~~
     480
     481The :class:`~django.test.testcase.TestCase` base class now has some helpers to
     482compare HTML roughly without tripping over trivial differences in whitespace,
     483argument quoting and ordering, and closing of self-closing tags. HTML can
     484either be compared directly with the new
     485:meth:`~django.test.testcase.TestCase.assertHTMLEqual` and
     486:meth:`~django.test.testcase.TestCase.assertHTMLNotEqual` assertions, or use
     487the ``html=True`` flag with
     488:meth:`~django.test.testcase.TestCase.assertContains` and
     489:meth:`~django.test.testcase.TestCase.assertNotContains` to test if the test
     490client's response contains a given HTML fragment. See the :ref:`assertion
     491documentation<assertions>` for more information.
     492
    478493Minor features
    479494~~~~~~~~~~~~~~
    480495
  • docs/topics/testing.txt

    diff --git a/docs/topics/testing.txt b/docs/topics/testing.txt
    index ea2c52b..64fa329 100644
    a b your test suite. 
    15421542        self.assertFieldOutput(EmailField, {'a@a.com': 'a@a.com'}, {'aaa': [u'Enter a valid e-mail address.']})
    15431543
    15441544
    1545 .. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='')
     1545.. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='', html=False)
    15461546
    15471547    Asserts that a ``Response`` instance produced the given ``status_code`` and
    15481548    that ``text`` appears in the content of the response. If ``count`` is
    15491549    provided, ``text`` must occur exactly ``count`` times in the response.
    15501550
    1551 .. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='')
     1551    .. versionadded:: 1.4
     1552
     1553    Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
     1554    the response content will be based on HTML semantics instead of
     1555    char-by-char equality. Whitespace is ignored in most cases, attribute
     1556    ordering is not significant. See :func:`~TestCase.assertHTMLEqual` for
     1557    more details.
     1558
     1559.. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='', html=False)
    15521560
    15531561    Asserts that a ``Response`` instance produced the given ``status_code`` and
    15541562    that ``text`` does not appears in the content of the response.
    15551563
     1564    .. versionadded:: 1.4
     1565
     1566    Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
     1567    the response content will be based on HTML semantics instead of
     1568    char-by-char equality. Whitespace is ignored in most cases, attribute
     1569    ordering is not significant. See :func:`~TestCase.assertHTMLEqual` for
     1570    more details.
     1571
    15561572.. method:: TestCase.assertFormError(response, form, field, errors, msg_prefix='')
    15571573
    15581574    Asserts that a field on a form raises the provided list of errors when
    your test suite. 
    16371653            Person.objects.create(name="Aaron")
    16381654            Person.objects.create(name="Daniel")
    16391655
     1656.. method:: TestCase.assertHTMLEqual(html1, html2, msg=None)
     1657
     1658    .. versionadded:: 1.4
     1659
     1660    Asserts that the strings ``html1`` and ``html2`` are equal. The comparison
     1661    is based on HTML semantics. The comparison takes following things into
     1662    account:
     1663
     1664    * Whitespace before and after HTML tags is ignored
     1665    * All types of whitespace are considered equal
     1666    * All open tags are closed implicitly, i.e. when a surrounding tag is
     1667      closed or the HTML document ends
     1668    * Empty tags are equal to their self-closing version
     1669    * The ordering of attributes inside of an HTML element is not significant
     1670    * Attributes without an argument are equal to attributes that equal in
     1671      name and value (see the examples)
     1672
     1673    The following examples are valid tests and don't raise any
     1674    ``AssertionError``::
     1675
     1676        self.assertHTMLEqual('<p>Hello <b>world!</p>',
     1677            '''<p>
     1678                Hello   <b>world! <b/>
     1679            </p>''')
     1680        self.assertHTMLEqual(
     1681            '<input type="checkbox" checked="checked" id="id_accept_terms" />',
     1682            '<input id="id_accept_terms" type='checkbox' checked>')
     1683
     1684    ``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
     1685    raised if one of them cannot be parsed.
     1686
     1687.. method:: TestCase.assertHTMLNotEqual(html1, html2, msg=None)
     1688
     1689    .. versionadded:: 1.4
     1690
     1691    Asserts that the strings ``html1`` and ``html2`` are *not* equal. The
     1692    comparison is based on HTML semantics. See
     1693    :func:`~TestCase.assertHTMLEqual` for details.
     1694
     1695    ``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
     1696    raised if one of them cannot be parsed.
     1697
    16401698
    16411699.. _topics-testing-email:
    16421700
  • tests/regressiontests/test_utils/tests.py

    diff --git a/tests/regressiontests/test_utils/tests.py b/tests/regressiontests/test_utils/tests.py
    index eab6895..f3ad43a 100644
    a b  
    11from __future__ import with_statement, absolute_import
    22
    33from django.forms import EmailField, IntegerField
     4from django.http import HttpResponse
     5from django.template.loader import render_to_string
    46from django.test import SimpleTestCase, TestCase, skipUnlessDBFeature
    57from django.utils.unittest import skip
    68
    class SaveRestoreWarningState(TestCase): 
    117119        self.restore_warnings_state()
    118120
    119121
     122class HTMLEqualTests(TestCase):
     123    def test_html_parser(self):
     124        from django.test.html import parse_html
     125        element = parse_html('<div><p>Hello</p></div>')
     126        self.assertEqual(len(element.children), 1)
     127        self.assertEqual(element.children[0].name, 'p')
     128        self.assertEqual(element.children[0].children[0], 'Hello')
     129
     130        parse_html('<p>')
     131        parse_html('<p attr>')
     132        dom = parse_html('<p>foo')
     133        self.assertEqual(len(dom.children), 1)
     134        self.assertEqual(dom.name, 'p')
     135        self.assertEqual(dom[0], 'foo')
     136
     137    def test_parse_html_in_script(self):
     138        from django.test.html import parse_html
     139        parse_html('<script>var a = "<p" + ">";</script>');
     140        parse_html('''
     141            <script>
     142            var js_sha_link='<p>***</p>';
     143            </script>
     144        ''')
     145
     146        # script content will be parsed to text
     147        dom = parse_html('''
     148            <script><p>foo</p> '</scr'+'ipt>' <span>bar</span></script>
     149        ''')
     150        self.assertEqual(len(dom.children), 1)
     151        self.assertEqual(dom.children[0], "<p>foo</p> '</scr'+'ipt>' <span>bar</span>")
     152
     153    def test_self_closing_tags(self):
     154        from django.test.html import parse_html
     155
     156        self_closing_tags = ('br' , 'hr', 'input', 'img', 'meta', 'spacer',
     157            'link', 'frame', 'base', 'col')
     158        for tag in self_closing_tags:
     159            dom = parse_html('<p>Hello <%s> world</p>' % tag)
     160            self.assertEqual(len(dom.children), 3)
     161            self.assertEqual(dom[0], 'Hello')
     162            self.assertEqual(dom[1].name, tag)
     163            self.assertEqual(dom[2], 'world')
     164
     165            dom = parse_html('<p>Hello <%s /> world</p>' % tag)
     166            self.assertEqual(len(dom.children), 3)
     167            self.assertEqual(dom[0], 'Hello')
     168            self.assertEqual(dom[1].name, tag)
     169            self.assertEqual(dom[2], 'world')
     170
     171    def test_simple_equal_html(self):
     172        self.assertHTMLEqual('', '')
     173        self.assertHTMLEqual('<p></p>', '<p></p>')
     174        self.assertHTMLEqual('<p></p>', ' <p> </p> ')
     175        self.assertHTMLEqual(
     176            '<div><p>Hello</p></div>',
     177            '<div><p>Hello</p></div>')
     178        self.assertHTMLEqual(
     179            '<div><p>Hello</p></div>',
     180            '<div> <p>Hello</p> </div>')
     181        self.assertHTMLEqual(
     182            '<div>\n<p>Hello</p></div>',
     183            '<div><p>Hello</p></div>\n')
     184        self.assertHTMLEqual(
     185            '<div><p>Hello\nWorld !</p></div>',
     186            '<div><p>Hello World\n!</p></div>')
     187        self.assertHTMLEqual(
     188            '<div><p>Hello\nWorld !</p></div>',
     189            '<div><p>Hello World\n!</p></div>')
     190        self.assertHTMLEqual(
     191            '<p>Hello  World   !</p>',
     192            '<p>Hello World\n\n!</p>')
     193        self.assertHTMLEqual('<p> </p>', '<p></p>')
     194        self.assertHTMLEqual('<p/>', '<p></p>')
     195        self.assertHTMLEqual('<p />', '<p></p>')
     196        self.assertHTMLEqual('<input checked>', '<input checked="checked">')
     197        self.assertHTMLEqual('<p>Hello', '<p> Hello')
     198        self.assertHTMLEqual('<p>Hello</p>World', '<p>Hello</p> World')
     199
     200    def test_ignore_comments(self):
     201        self.assertHTMLEqual(
     202            '<div>Hello<!-- this is a comment --> World!</div>',
     203            '<div>Hello World!</div>')
     204
     205    def test_unequal_html(self):
     206        self.assertHTMLNotEqual('<p>Hello</p>', '<p>Hello!</p>')
     207        self.assertHTMLNotEqual('<p>foo&#20;bar</p>', '<p>foo&nbsp;bar</p>')
     208        self.assertHTMLNotEqual('<p>foo bar</p>', '<p>foo &nbsp;bar</p>')
     209        self.assertHTMLNotEqual('<p>foo nbsp</p>', '<p>foo &nbsp;</p>')
     210        self.assertHTMLNotEqual('<p>foo #20</p>', '<p>foo &#20;</p>')
     211
     212    def test_attributes(self):
     213        self.assertHTMLEqual(
     214            '<input type="text" id="id_name" />',
     215            '<input id="id_name" type="text" />')
     216        self.assertHTMLEqual(
     217            '''<input type='text' id="id_name" />''',
     218            '<input id="id_name" type="text" />')
     219        self.assertHTMLNotEqual(
     220            '<input type="text" id="id_name" />',
     221            '<input type="password" id="id_name" />')
     222
     223    def test_complex_examples(self):
     224        self.assertHTMLEqual(
     225        """<tr><th><label for="id_first_name">First name:</label></th>
     226<td><input type="text" name="first_name" value="John" id="id_first_name" /></td></tr>
     227<tr><th><label for="id_last_name">Last name:</label></th>
     228<td><input type="text" id="id_last_name" name="last_name" value="Lennon" /></td></tr>
     229<tr><th><label for="id_birthday">Birthday:</label></th>
     230<td><input type="text" value="1940-10-9" name="birthday" id="id_birthday" /></td></tr>""",
     231        """
     232        <tr><th>
     233            <label for="id_first_name">First name:</label></th><td><input type="text" name="first_name" value="John" id="id_first_name" />
     234        </td></tr>
     235        <tr><th>
     236            <label for="id_last_name">Last name:</label></th><td><input type="text" name="last_name" value="Lennon" id="id_last_name" />
     237        </td></tr>
     238        <tr><th>
     239            <label for="id_birthday">Birthday:</label></th><td><input type="text" name="birthday" value="1940-10-9" id="id_birthday" />
     240        </td></tr>
     241        """)
     242
     243        self.assertHTMLEqual(
     244        """<!DOCTYPE html>
     245        <html>
     246        <head>
     247            <link rel="stylesheet">
     248            <title>Document</title>
     249            <meta attribute="value">
     250        </head>
     251        <body>
     252            <p>
     253            This is a valid paragraph
     254            <div> this is a div AFTER the p</div>
     255        </body>
     256        </html>""", """
     257        <html>
     258        <head>
     259            <link rel="stylesheet">
     260            <title>Document</title>
     261            <meta attribute="value">
     262        </head>
     263        <body>
     264            <p> This is a valid paragraph
     265            <!-- browsers would close the p tag here -->
     266            <div> this is a div AFTER the p</div>
     267            </p> <!-- this is invalid html parsing however it should make no
     268            difference in most cases -->
     269        </body>
     270        </html>""")
     271
     272    def test_html_contain(self):
     273        from django.test.html import parse_html
     274        # equal html contains each other
     275        dom1 = parse_html('<p>foo')
     276        dom2 = parse_html('<p>foo</p>')
     277        self.assertTrue(dom1 in dom2)
     278        self.assertTrue(dom2 in dom1)
     279
     280        dom2 = parse_html('<div><p>foo</p></div>')
     281        self.assertTrue(dom1 in dom2)
     282        self.assertTrue(dom2 not in dom1)
     283
     284        self.assertFalse('<p>foo</p>' in dom2)
     285        self.assertTrue('foo' in dom2)
     286
     287        # when a root element is used ...
     288        dom1 = parse_html('<p>foo</p><p>bar</p>')
     289        dom2 = parse_html('<p>foo</p><p>bar</p>')
     290        self.assertTrue(dom1 in dom2)
     291        dom1 = parse_html('<p>foo</p>')
     292        self.assertTrue(dom1 in dom2)
     293        dom1 = parse_html('<p>bar</p>')
     294        self.assertTrue(dom1 in dom2)
     295
     296    def test_count(self):
     297        from django.test.html import parse_html
     298        # equal html contains each other one time
     299        dom1 = parse_html('<p>foo')
     300        dom2 = parse_html('<p>foo</p>')
     301        self.assertEqual(dom1.count(dom2), 1)
     302        self.assertEqual(dom2.count(dom1), 1)
     303
     304        dom2 = parse_html('<p>foo</p><p>bar</p>')
     305        self.assertEqual(dom2.count(dom1), 1)
     306
     307        dom2 = parse_html('<p>foo foo</p><p>foo</p>')
     308        self.assertEqual(dom2.count('foo'), 3)
     309
     310        dom2 = parse_html('<p class="bar">foo</p>')
     311        self.assertEqual(dom2.count('bar'), 0)
     312        self.assertEqual(dom2.count('class'), 0)
     313        self.assertEqual(dom2.count('p'), 0)
     314        self.assertEqual(dom2.count('o'), 2)
     315
     316        dom2 = parse_html('<p>foo</p><p>foo</p>')
     317        self.assertEqual(dom2.count(dom1), 2)
     318
     319        dom2 = parse_html('<div><p>foo<input type=""></p><p>foo</p></div>')
     320        self.assertEqual(dom2.count(dom1), 1)
     321
     322        dom2 = parse_html('<div><div><p>foo</p></div></div>')
     323        self.assertEqual(dom2.count(dom1), 1)
     324
     325        dom2 = parse_html('<p>foo<p>foo</p></p>')
     326        self.assertEqual(dom2.count(dom1), 1)
     327
     328        dom2 = parse_html('<p>foo<p>bar</p></p>')
     329        self.assertEqual(dom2.count(dom1), 0)
     330
     331    def test_parsing_errors(self):
     332        from django.test.html import HTMLParseError, parse_html
     333        with self.assertRaises(AssertionError):
     334            self.assertHTMLEqual('<p>', '')
     335        with self.assertRaises(AssertionError):
     336            self.assertHTMLEqual('', '<p>')
     337        with self.assertRaises(HTMLParseError):
     338            parse_html('</p>')
     339        with self.assertRaises(HTMLParseError):
     340            parse_html('<!--')
     341
     342    def test_contains_html(self):
     343        response = HttpResponse('''<body>
     344        This is a form: <form action="" method="get">
     345            <input type="text" name="Hello" />
     346        </form></body>''')
     347
     348        self.assertNotContains(response, "<input name='Hello' type='text'>")
     349        self.assertContains(response, '<form action="" method="get">')
     350
     351        self.assertContains(response, "<input name='Hello' type='text'>", html=True)
     352        self.assertNotContains(response, '<form action="" method="get">', html=True)
     353
     354        invalid_response = HttpResponse('''<body <bad>>''')
     355
     356        with self.assertRaises(AssertionError):
     357            self.assertContains(invalid_response, '<p></p>')
     358
     359        with self.assertRaises(AssertionError):
     360            self.assertContains(response, '<p "whats" that>')
     361
     362
    120363class SkippingExtraTests(TestCase):
    121364    fixtures = ['should_not_be_loaded.json']
    122365
Back to Top