Commit 844a24bb authored by Carl Meyer's avatar Carl Meyer
Browse files

Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and...

Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37
parent c82f1dcf
Loading
Loading
Loading
Loading

django/test/html.py

0 → 100644
+221 −0
Original line number Diff line number Diff line
"""
Comparing two html documents.
"""
import re
from HTMLParser import HTMLParseError
from django.utils.encoding import force_unicode
from django.utils.htmlparser import HTMLParser


WHITESPACE = re.compile('\s+')


def normalize_whitespace(string):
    return WHITESPACE.sub(' ', string)


class Element(object):
    def __init__(self, name, attributes):
        self.name = name
        self.attributes = sorted(attributes)
        self.children = []

    def append(self, element):
        if isinstance(element, basestring):
            element = force_unicode(element)
            element = normalize_whitespace(element)
            if self.children:
                if isinstance(self.children[-1], basestring):
                    self.children[-1] += element
                    self.children[-1] = normalize_whitespace(self.children[-1])
                    return
        elif self.children:
            # removing last children if it is only whitespace
            # this can result in incorrect dom representations since
            # whitespace between inline tags like <span> is significant
            if isinstance(self.children[-1], basestring):
                if self.children[-1].isspace():
                    self.children.pop()
        if element:
            self.children.append(element)

    def finalize(self):
        def rstrip_last_element(children):
            if children:
                if isinstance(children[-1], basestring):
                    children[-1] = children[-1].rstrip()
                    if not children[-1]:
                        children.pop()
                        children = rstrip_last_element(children)
            return children

        rstrip_last_element(self.children)
        for i, child in enumerate(self.children):
            if isinstance(child, basestring):
                self.children[i] = child.strip()
            elif hasattr(child, 'finalize'):
                child.finalize()

    def __eq__(self, element):
        if not hasattr(element, 'name'):
            return False
        if hasattr(element, 'name') and self.name != element.name:
            return False
        if len(self.attributes) != len(element.attributes):
            return False
        if self.attributes != element.attributes:
            # attributes without a value is same as attribute with value that
            # equals the attributes name:
            # <input checked> == <input checked="checked">
            for i in range(len(self.attributes)):
                attr, value = self.attributes[i]
                other_attr, other_value = element.attributes[i]
                if value is None:
                    value = attr
                if other_value is None:
                    other_value = other_attr
                if attr != other_attr or value != other_value:
                    return False
        if self.children != element.children:
            return False
        return True

    def __ne__(self, element):
        return not self.__eq__(element)

    def _count(self, element, count=True):
        if not isinstance(element, basestring):
            if self == element:
                return 1
        i = 0
        for child in self.children:
            # child is text content and element is also text content, then
            # make a simple "text" in "text"
            if isinstance(child, basestring):
                if isinstance(element, basestring):
                    if count:
                        i += child.count(element)
                    elif element in child:
                        return 1
            else:
                i += child._count(element, count=count)
                if not count and i:
                    return i
        return i

    def __contains__(self, element):
        return self._count(element, count=False) > 0

    def count(self, element):
        return self._count(element, count=True)

    def __getitem__(self, key):
        return self.children[key]

    def __unicode__(self):
        output = u'<%s' % self.name
        for key, value in self.attributes:
            if value:
                output += u' %s="%s"' % (key, value)
            else:
                output += u' %s' % key
        if self.children:
            output += u'>\n'
            output += u''.join(unicode(c) for c in self.children)
            output += u'\n</%s>' % self.name
        else:
            output += u' />'
        return output

    def __repr__(self):
        return unicode(self)


class RootElement(Element):
    def __init__(self):
        super(RootElement, self).__init__(None, ())

    def __unicode__(self):
        return u''.join(unicode(c) for c in self.children)


class Parser(HTMLParser):
    SELF_CLOSING_TAGS = ('br' , 'hr', 'input', 'img', 'meta', 'spacer',
        'link', 'frame', 'base', 'col')

    def __init__(self):
        HTMLParser.__init__(self)
        self.root = RootElement()
        self.open_tags = []
        self.element_positions = {}

    def error(self, msg):
        raise HTMLParseError(msg, self.getpos())

    def format_position(self, position=None, element=None):
        if not position and element:
            position = self.element_positions[element]
        if position is None:
            position = self.getpos()
        if hasattr(position, 'lineno'):
            position = position.lineno, position.offset
        return 'Line %d, Column %d' % position

    @property
    def current(self):
        if self.open_tags:
            return self.open_tags[-1]
        else:
            return self.root

    def handle_startendtag(self, tag, attrs):
        self.handle_starttag(tag, attrs)
        if tag not in self.SELF_CLOSING_TAGS:
            self.handle_endtag(tag)

    def handle_starttag(self, tag, attrs):
        element = Element(tag, attrs)
        self.current.append(element)
        if tag not in self.SELF_CLOSING_TAGS:
            self.open_tags.append(element)
        self.element_positions[element] = self.getpos()

    def handle_endtag(self, tag):
        if not self.open_tags:
            self.error("Unexpected end tag `%s` (%s)" % (
                tag, self.format_position()))
        element = self.open_tags.pop()
        while element.name != tag:
            if not self.open_tags:
                self.error("Unexpected end tag `%s` (%s)" % (
                    tag, self.format_position()))
            element = self.open_tags.pop()

    def handle_data(self, data):
        self.current.append(data)

    def handle_charref(self, name):
        self.current.append('&%s;' % name)

    def handle_entityref(self, name):
        self.current.append('&%s;' % name)


def parse_html(html):
    """
    Takes a string that contains *valid* HTML and turns it into a Python object
    structure that can be easily compared against other HTML on semantic
    equivilance. Syntactical differences like which quotation is used on
    arguments will be ignored.

    """
    parser = Parser()
    parser.feed(html)
    parser.close()
    document = parser.root
    document.finalize()
    # Removing ROOT element if it's not necessary
    if len(document.children) == 1:
        if not isinstance(document.children[0], basestring):
            document = document.children[0]
    return document
+62 −4
Original line number Diff line number Diff line
from __future__ import with_statement

import difflib
import os
import re
import sys
@@ -29,12 +30,14 @@ from django.forms.fields import CharField
from django.http import QueryDict
from django.test import _doctest as doctest
from django.test.client import Client
from django.test.html import HTMLParseError, parse_html
from django.test.signals import template_rendered
from django.test.utils import (get_warnings_state, restore_warnings_state,
    override_settings)
from django.test.utils import ContextList
from django.utils import simplejson, unittest as ut2
from django.utils.encoding import smart_str, force_unicode
from django.utils.unittest.util import safe_repr
from django.views.static import serve

__all__ = ('DocTestRunner', 'OutputChecker', 'TestCase', 'TransactionTestCase',
@@ -78,6 +81,16 @@ def restore_transaction_methods():
    transaction.leave_transaction_management = real_leave_transaction_management
    transaction.managed = real_managed


def assert_and_parse_html(self, html, user_msg, msg):
    try:
        dom = parse_html(html)
    except HTMLParseError, e:
        standardMsg = u'%s\n%s' % (msg, e.msg)
        self.fail(self._formatMessage(user_msg, standardMsg))
    return dom


class OutputChecker(doctest.OutputChecker):
    def check_output(self, want, got, optionflags):
        """
@@ -396,6 +409,39 @@ class SimpleTestCase(ut2.TestCase):
            self.assertTrue(isinstance(fieldclass(*field_args, **field_kwargs),
                                       fieldclass))

    def assertHTMLEqual(self, html1, html2, msg=None):
        """
        Asserts that two html snippets are semantically the same,
        e.g. whitespace in most cases is ignored, attribute ordering is not
        significant. The passed in arguments must be valid HTML.

        """
        dom1 = assert_and_parse_html(self, html1, msg,
            u'First argument is not valid html:')
        dom2 = assert_and_parse_html(self, html2, msg,
            u'Second argument is not valid html:')

        if dom1 != dom2:
            standardMsg = '%s != %s' % (
                safe_repr(dom1, True), safe_repr(dom2, True))
            diff = ('\n' + '\n'.join(difflib.ndiff(
                           unicode(dom1).splitlines(),
                           unicode(dom2).splitlines())))
            standardMsg = self._truncateMessage(standardMsg, diff)
            self.fail(self._formatMessage(msg, standardMsg))

    def assertHTMLNotEqual(self, html1, html2, msg=None):
        """Asserts that two HTML snippets are not semantically equivalent."""
        dom1 = assert_and_parse_html(self, html1, msg,
            u'First argument is not valid html:')
        dom2 = assert_and_parse_html(self, html2, msg,
            u'Second argument is not valid html:')

        if dom1 == dom2:
            standardMsg = '%s == %s' % (
                safe_repr(dom1, True), safe_repr(dom2, True))
            self.fail(self._formatMessage(msg, standardMsg))


class TransactionTestCase(SimpleTestCase):
    # The class we'll use for the test client self.client.
@@ -554,7 +600,7 @@ class TransactionTestCase(SimpleTestCase):
                (url, expected_url))

    def assertContains(self, response, text, count=None, status_code=200,
                       msg_prefix=''):
                       msg_prefix='', html=False):
        """
        Asserts that a response indicates that some content was retrieved
        successfully, (i.e., the HTTP status code was as expected), and that
@@ -576,7 +622,13 @@ class TransactionTestCase(SimpleTestCase):
            msg_prefix + "Couldn't retrieve content: Response code was %d"
            " (expected %d)" % (response.status_code, status_code))
        text = smart_str(text, response._charset)
        real_count = response.content.count(text)
        content = response.content
        if html:
            content = assert_and_parse_html(self, content, None,
                u"Response's content is not valid html:")
            text = assert_and_parse_html(self, text, None,
                u"Second argument is not valid html:")
        real_count = content.count(text)
        if count is not None:
            self.assertEqual(real_count, count,
                msg_prefix + "Found %d instances of '%s' in response"
@@ -586,7 +638,7 @@ class TransactionTestCase(SimpleTestCase):
                msg_prefix + "Couldn't find '%s' in response" % text)

    def assertNotContains(self, response, text, status_code=200,
                          msg_prefix=''):
                          msg_prefix='', html=False):
        """
        Asserts that a response indicates that some content was retrieved
        successfully, (i.e., the HTTP status code was as expected), and that
@@ -606,7 +658,13 @@ class TransactionTestCase(SimpleTestCase):
            msg_prefix + "Couldn't retrieve content: Response code was %d"
            " (expected %d)" % (response.status_code, status_code))
        text = smart_str(text, response._charset)
        self.assertEqual(response.content.count(text), 0,
        content = response.content
        if html:
            content = assert_and_parse_html(self, content, None,
                u'Response\'s content is no valid html:')
            text = assert_and_parse_html(self, text, None,
                u'Second argument is no valid html:')
        self.assertEqual(content.count(text), 0,
            msg_prefix + "Response should not contain '%s'" % text)

    def assertFormError(self, response, form, field, errors, msg_prefix=''):
+94 −0
Original line number Diff line number Diff line
import HTMLParser as _HTMLParser


class HTMLParser(_HTMLParser.HTMLParser):
    """
    Patched version of stdlib's HTMLParser with patch from:
    http://bugs.python.org/issue670664
    """
    def __init__(self):
        _HTMLParser.HTMLParser.__init__(self)
        self.cdata_tag = None

    def set_cdata_mode(self, tag):
        self.interesting = _HTMLParser.interesting_cdata
        self.cdata_tag = tag.lower()

    def clear_cdata_mode(self):
        self.interesting = _HTMLParser.interesting_normal
        self.cdata_tag = None

    # Internal -- handle starttag, return end or -1 if not terminated
    def parse_starttag(self, i):
        self.__starttag_text = None
        endpos = self.check_for_whole_start_tag(i)
        if endpos < 0:
            return endpos
        rawdata = self.rawdata
        self.__starttag_text = rawdata[i:endpos]

        # Now parse the data between i+1 and j into a tag and attrs
        attrs = []
        match = _HTMLParser.tagfind.match(rawdata, i + 1)
        assert match, 'unexpected call to parse_starttag()'
        k = match.end()
        self.lasttag = tag = rawdata[i + 1:k].lower()

        while k < endpos:
            m = _HTMLParser.attrfind.match(rawdata, k)
            if not m:
                break
            attrname, rest, attrvalue = m.group(1, 2, 3)
            if not rest:
                attrvalue = None
            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                 attrvalue[:1] == '"' == attrvalue[-1:]:
                attrvalue = attrvalue[1:-1]
                attrvalue = self.unescape(attrvalue)
            attrs.append((attrname.lower(), attrvalue))
            k = m.end()

        end = rawdata[k:endpos].strip()
        if end not in (">", "/>"):
            lineno, offset = self.getpos()
            if "\n" in self.__starttag_text:
                lineno = lineno + self.__starttag_text.count("\n")
                offset = len(self.__starttag_text) \
                         - self.__starttag_text.rfind("\n")
            else:
                offset = offset + len(self.__starttag_text)
            self.error("junk characters in start tag: %r"
                       % (rawdata[k:endpos][:20],))
        if end.endswith('/>'):
            # XHTML-style empty tag: <span attr="value" />
            self.handle_startendtag(tag, attrs)
        else:
            self.handle_starttag(tag, attrs)
            if tag in self.CDATA_CONTENT_ELEMENTS:
                self.set_cdata_mode(tag) # <--------------------------- Changed
        return endpos

    # Internal -- parse endtag, return end or -1 if incomplete
    def parse_endtag(self, i):
        rawdata = self.rawdata
        assert rawdata[i:i + 2] == "</", "unexpected call to parse_endtag"
        match = _HTMLParser.endendtag.search(rawdata, i + 1) # >
        if not match:
            return -1
        j = match.end()
        match = _HTMLParser.endtagfind.match(rawdata, i) # </ + tag + >
        if not match:
            if self.cdata_tag is not None: # *** add ***
                self.handle_data(rawdata[i:j]) # *** add ***
                return j # *** add ***
            self.error("bad end tag: %r" % (rawdata[i:j],))
        # --- changed start ---------------------------------------------------
        tag = match.group(1).strip()
        if self.cdata_tag is not None:
            if tag.lower() != self.cdata_tag:
                self.handle_data(rawdata[i:j])
                return j
        # --- changed end -----------------------------------------------------
        self.handle_endtag(tag.lower())
        self.clear_cdata_mode()
        return j
+15 −0
Original line number Diff line number Diff line
@@ -475,6 +475,21 @@ Time zone support is enabled by default in new projects created with
:djadmin:`startproject`. If you want to use this feature in an existing
project, read the :ref:`migration guide <time-zones-migration-guide>`.

HTML comparisons in tests
~~~~~~~~~~~~~~~~~~~~~~~~~

The :class:`~django.test.testcase.TestCase` base class now has some helpers to
compare HTML without tripping over irrelevant differences in whitespace,
argument quoting and ordering, and closing of self-closing tags. HTML can
either be compared directly with the new
:meth:`~django.test.testcase.TestCase.assertHTMLEqual` and
:meth:`~django.test.testcase.TestCase.assertHTMLNotEqual` assertions, or use
the ``html=True`` flag with
:meth:`~django.test.testcase.TestCase.assertContains` and
:meth:`~django.test.testcase.TestCase.assertNotContains` to test if the test
client's response contains a given HTML fragment. See the :ref:`assertion
documentation<assertions>` for more information.

Minor features
~~~~~~~~~~~~~~

+60 −2
Original line number Diff line number Diff line
@@ -1542,17 +1542,33 @@ your test suite.
        self.assertFieldOutput(EmailField, {'a@a.com': 'a@a.com'}, {'aaa': [u'Enter a valid e-mail address.']})


.. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='')
.. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='', html=False)

    Asserts that a ``Response`` instance produced the given ``status_code`` and
    that ``text`` appears in the content of the response. If ``count`` is
    provided, ``text`` must occur exactly ``count`` times in the response.

.. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='')
    .. versionadded:: 1.4

    Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
    the response content will be based on HTML semantics instead of
    character-by-character equality. Whitespace is ignored in most cases,
    attribute ordering is not significant. See
    :func:`~TestCase.assertHTMLEqual` for more details.

.. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='', html=False)

    Asserts that a ``Response`` instance produced the given ``status_code`` and
    that ``text`` does not appears in the content of the response.

    .. versionadded:: 1.4

    Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
    the response content will be based on HTML semantics instead of
    character-by-character equality. Whitespace is ignored in most cases,
    attribute ordering is not significant. See
    :func:`~TestCase.assertHTMLEqual` for more details.

.. method:: TestCase.assertFormError(response, form, field, errors, msg_prefix='')

    Asserts that a field on a form raises the provided list of errors when
@@ -1656,6 +1672,48 @@ your test suite.
            Person.objects.create(name="Aaron")
            Person.objects.create(name="Daniel")

.. method:: TestCase.assertHTMLEqual(html1, html2, msg=None)

    .. versionadded:: 1.4

    Asserts that the strings ``html1`` and ``html2`` are equal. The comparison
    is based on HTML semantics. The comparison takes following things into
    account:

    * Whitespace before and after HTML tags is ignored
    * All types of whitespace are considered equivalent
    * All open tags are closed implicitly, i.e. when a surrounding tag is
      closed or the HTML document ends
    * Empty tags are equivalent to their self-closing version
    * The ordering of attributes of an HTML element is not significant
    * Attributes without an argument are equal to attributes that equal in
      name and value (see the examples)

    The following examples are valid tests and don't raise any
    ``AssertionError``::

        self.assertHTMLEqual('<p>Hello <b>world!</p>',
            '''<p>
                Hello   <b>world! <b/>
            </p>''')
        self.assertHTMLEqual(
            '<input type="checkbox" checked="checked" id="id_accept_terms" />',
            '<input id="id_accept_terms" type='checkbox' checked>')

    ``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
    raised if one of them cannot be parsed.

.. method:: TestCase.assertHTMLNotEqual(html1, html2, msg=None)

    .. versionadded:: 1.4

    Asserts that the strings ``html1`` and ``html2`` are *not* equal. The
    comparison is based on HTML semantics. See
    :func:`~TestCase.assertHTMLEqual` for details.

    ``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
    raised if one of them cannot be parsed.


.. _topics-testing-email:

Loading