Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and... (844a24bb) · Commits · Dom Sekotill / django

django/test/html.py

0 → 100644

+221 −0

Original line number	Diff line number	Diff line
		"""
		Comparing two html documents.
		"""
		import re
		from HTMLParser import HTMLParseError
		from django.utils.encoding import force_unicode
		from django.utils.htmlparser import HTMLParser


		WHITESPACE = re.compile('\s+')


		def normalize_whitespace(string):
		return WHITESPACE.sub(' ', string)


		class Element(object):
		def __init__(self, name, attributes):
		self.name = name
		self.attributes = sorted(attributes)
		self.children = []

		def append(self, element):
		if isinstance(element, basestring):
		element = force_unicode(element)
		element = normalize_whitespace(element)
		if self.children:
		if isinstance(self.children[-1], basestring):
		self.children[-1] += element
		self.children[-1] = normalize_whitespace(self.children[-1])
		return
		elif self.children:
		# removing last children if it is only whitespace
		# this can result in incorrect dom representations since
		# whitespace between inline tags like <span> is significant
		if isinstance(self.children[-1], basestring):
		if self.children[-1].isspace():
		self.children.pop()
		if element:
		self.children.append(element)

		def finalize(self):
		def rstrip_last_element(children):
		if children:
		if isinstance(children[-1], basestring):
		children[-1] = children[-1].rstrip()
		if not children[-1]:
		children.pop()
		children = rstrip_last_element(children)
		return children

		rstrip_last_element(self.children)
		for i, child in enumerate(self.children):
		if isinstance(child, basestring):
		self.children[i] = child.strip()
		elif hasattr(child, 'finalize'):
		child.finalize()

		def __eq__(self, element):
		if not hasattr(element, 'name'):
		return False
		if hasattr(element, 'name') and self.name != element.name:
		return False
		if len(self.attributes) != len(element.attributes):
		return False
		if self.attributes != element.attributes:
		# attributes without a value is same as attribute with value that
		# equals the attributes name:
		# <input checked> == <input checked="checked">
		for i in range(len(self.attributes)):
		attr, value = self.attributes[i]
		other_attr, other_value = element.attributes[i]
		if value is None:
		value = attr
		if other_value is None:
		other_value = other_attr
		if attr != other_attr or value != other_value:
		return False
		if self.children != element.children:
		return False
		return True

		def __ne__(self, element):
		return not self.__eq__(element)

		def _count(self, element, count=True):
		if not isinstance(element, basestring):
		if self == element:
		return 1
		i = 0
		for child in self.children:
		# child is text content and element is also text content, then
		# make a simple "text" in "text"
		if isinstance(child, basestring):
		if isinstance(element, basestring):
		if count:
		i += child.count(element)
		elif element in child:
		return 1
		else:
		i += child._count(element, count=count)
		if not count and i:
		return i
		return i

		def __contains__(self, element):
		return self._count(element, count=False) > 0

		def count(self, element):
		return self._count(element, count=True)

		def __getitem__(self, key):
		return self.children[key]

		def __unicode__(self):
		output = u'<%s' % self.name
		for key, value in self.attributes:
		if value:
		output += u' %s="%s"' % (key, value)
		else:
		output += u' %s' % key
		if self.children:
		output += u'>\n'
		output += u''.join(unicode(c) for c in self.children)
		output += u'\n</%s>' % self.name
		else:
		output += u' />'
		return output

		def __repr__(self):
		return unicode(self)


		class RootElement(Element):
		def __init__(self):
		super(RootElement, self).__init__(None, ())

		def __unicode__(self):
		return u''.join(unicode(c) for c in self.children)


		class Parser(HTMLParser):
		SELF_CLOSING_TAGS = ('br' , 'hr', 'input', 'img', 'meta', 'spacer',
		'link', 'frame', 'base', 'col')

		def __init__(self):
		HTMLParser.__init__(self)
		self.root = RootElement()
		self.open_tags = []
		self.element_positions = {}

		def error(self, msg):
		raise HTMLParseError(msg, self.getpos())

		def format_position(self, position=None, element=None):
		if not position and element:
		position = self.element_positions[element]
		if position is None:
		position = self.getpos()
		if hasattr(position, 'lineno'):
		position = position.lineno, position.offset
		return 'Line %d, Column %d' % position

		@property
		def current(self):
		if self.open_tags:
		return self.open_tags[-1]
		else:
		return self.root

		def handle_startendtag(self, tag, attrs):
		self.handle_starttag(tag, attrs)
		if tag not in self.SELF_CLOSING_TAGS:
		self.handle_endtag(tag)

		def handle_starttag(self, tag, attrs):
		element = Element(tag, attrs)
		self.current.append(element)
		if tag not in self.SELF_CLOSING_TAGS:
		self.open_tags.append(element)
		self.element_positions[element] = self.getpos()

		def handle_endtag(self, tag):
		if not self.open_tags:
		self.error("Unexpected end tag `%s` (%s)" % (
		tag, self.format_position()))
		element = self.open_tags.pop()
		while element.name != tag:
		if not self.open_tags:
		self.error("Unexpected end tag `%s` (%s)" % (
		tag, self.format_position()))
		element = self.open_tags.pop()

		def handle_data(self, data):
		self.current.append(data)

		def handle_charref(self, name):
		self.current.append('&%s;' % name)

		def handle_entityref(self, name):
		self.current.append('&%s;' % name)


		def parse_html(html):
		"""
		Takes a string that contains valid HTML and turns it into a Python object
		structure that can be easily compared against other HTML on semantic
		equivilance. Syntactical differences like which quotation is used on
		arguments will be ignored.

		"""
		parser = Parser()
		parser.feed(html)
		parser.close()
		document = parser.root
		document.finalize()
		# Removing ROOT element if it's not necessary
		if len(document.children) == 1:
		if not isinstance(document.children[0], basestring):
		document = document.children[0]
		return document

django/test/testcases.py

+62 −4

Original line number	Diff line number	Diff line
		from __future__ import with_statement

		import difflib
		import os
		import re
		import sys
		@@ -29,12 +30,14 @@ from django.forms.fields import CharField
		from django.http import QueryDict
		from django.test import _doctest as doctest
		from django.test.client import Client
		from django.test.html import HTMLParseError, parse_html
		from django.test.signals import template_rendered
		from django.test.utils import (get_warnings_state, restore_warnings_state,
		override_settings)
		from django.test.utils import ContextList
		from django.utils import simplejson, unittest as ut2
		from django.utils.encoding import smart_str, force_unicode
		from django.utils.unittest.util import safe_repr
		from django.views.static import serve

		__all__ = ('DocTestRunner', 'OutputChecker', 'TestCase', 'TransactionTestCase',
		@@ -78,6 +81,16 @@ def restore_transaction_methods():
		transaction.leave_transaction_management = real_leave_transaction_management
		transaction.managed = real_managed


		def assert_and_parse_html(self, html, user_msg, msg):
		try:
		dom = parse_html(html)
		except HTMLParseError, e:
		standardMsg = u'%s\n%s' % (msg, e.msg)
		self.fail(self._formatMessage(user_msg, standardMsg))
		return dom


		class OutputChecker(doctest.OutputChecker):
		def check_output(self, want, got, optionflags):
		"""
		@@ -396,6 +409,39 @@ class SimpleTestCase(ut2.TestCase):
		self.assertTrue(isinstance(fieldclass(field_args, *field_kwargs),
		fieldclass))

		def assertHTMLEqual(self, html1, html2, msg=None):
		"""
		Asserts that two html snippets are semantically the same,
		e.g. whitespace in most cases is ignored, attribute ordering is not
		significant. The passed in arguments must be valid HTML.

		"""
		dom1 = assert_and_parse_html(self, html1, msg,
		u'First argument is not valid html:')
		dom2 = assert_and_parse_html(self, html2, msg,
		u'Second argument is not valid html:')

		if dom1 != dom2:
		standardMsg = '%s != %s' % (
		safe_repr(dom1, True), safe_repr(dom2, True))
		diff = ('\n' + '\n'.join(difflib.ndiff(
		unicode(dom1).splitlines(),
		unicode(dom2).splitlines())))
		standardMsg = self._truncateMessage(standardMsg, diff)
		self.fail(self._formatMessage(msg, standardMsg))

		def assertHTMLNotEqual(self, html1, html2, msg=None):
		"""Asserts that two HTML snippets are not semantically equivalent."""
		dom1 = assert_and_parse_html(self, html1, msg,
		u'First argument is not valid html:')
		dom2 = assert_and_parse_html(self, html2, msg,
		u'Second argument is not valid html:')

		if dom1 == dom2:
		standardMsg = '%s == %s' % (
		safe_repr(dom1, True), safe_repr(dom2, True))
		self.fail(self._formatMessage(msg, standardMsg))


		class TransactionTestCase(SimpleTestCase):
		# The class we'll use for the test client self.client.
		@@ -554,7 +600,7 @@ class TransactionTestCase(SimpleTestCase):
		(url, expected_url))

		def assertContains(self, response, text, count=None, status_code=200,
		msg_prefix=''):
		msg_prefix='', html=False):
		"""
		Asserts that a response indicates that some content was retrieved
		successfully, (i.e., the HTTP status code was as expected), and that
		@@ -576,7 +622,13 @@ class TransactionTestCase(SimpleTestCase):
		msg_prefix + "Couldn't retrieve content: Response code was %d"
		" (expected %d)" % (response.status_code, status_code))
		text = smart_str(text, response._charset)
		real_count = response.content.count(text)
		content = response.content
		if html:
		content = assert_and_parse_html(self, content, None,
		u"Response's content is not valid html:")
		text = assert_and_parse_html(self, text, None,
		u"Second argument is not valid html:")
		real_count = content.count(text)
		if count is not None:
		self.assertEqual(real_count, count,
		msg_prefix + "Found %d instances of '%s' in response"
		@@ -586,7 +638,7 @@ class TransactionTestCase(SimpleTestCase):
		msg_prefix + "Couldn't find '%s' in response" % text)

		def assertNotContains(self, response, text, status_code=200,
		msg_prefix=''):
		msg_prefix='', html=False):
		"""
		Asserts that a response indicates that some content was retrieved
		successfully, (i.e., the HTTP status code was as expected), and that
		@@ -606,7 +658,13 @@ class TransactionTestCase(SimpleTestCase):
		msg_prefix + "Couldn't retrieve content: Response code was %d"
		" (expected %d)" % (response.status_code, status_code))
		text = smart_str(text, response._charset)
		self.assertEqual(response.content.count(text), 0,
		content = response.content
		if html:
		content = assert_and_parse_html(self, content, None,
		u'Response\'s content is no valid html:')
		text = assert_and_parse_html(self, text, None,
		u'Second argument is no valid html:')
		self.assertEqual(content.count(text), 0,
		msg_prefix + "Response should not contain '%s'" % text)

		def assertFormError(self, response, form, field, errors, msg_prefix=''):

django/utils/htmlparser.py

0 → 100644

+94 −0

Original line number	Diff line number	Diff line
		import HTMLParser as _HTMLParser


		class HTMLParser(_HTMLParser.HTMLParser):
		"""
		Patched version of stdlib's HTMLParser with patch from:
		http://bugs.python.org/issue670664
		"""
		def __init__(self):
		_HTMLParser.HTMLParser.__init__(self)
		self.cdata_tag = None

		def set_cdata_mode(self, tag):
		self.interesting = _HTMLParser.interesting_cdata
		self.cdata_tag = tag.lower()

		def clear_cdata_mode(self):
		self.interesting = _HTMLParser.interesting_normal
		self.cdata_tag = None

		# Internal -- handle starttag, return end or -1 if not terminated
		def parse_starttag(self, i):
		self.__starttag_text = None
		endpos = self.check_for_whole_start_tag(i)
		if endpos < 0:
		return endpos
		rawdata = self.rawdata
		self.__starttag_text = rawdata[i:endpos]

		# Now parse the data between i+1 and j into a tag and attrs
		attrs = []
		match = _HTMLParser.tagfind.match(rawdata, i + 1)
		assert match, 'unexpected call to parse_starttag()'
		k = match.end()
		self.lasttag = tag = rawdata[i + 1:k].lower()

		while k < endpos:
		m = _HTMLParser.attrfind.match(rawdata, k)
		if not m:
		break
		attrname, rest, attrvalue = m.group(1, 2, 3)
		if not rest:
		attrvalue = None
		elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
		attrvalue[:1] == '"' == attrvalue[-1:]:
		attrvalue = attrvalue[1:-1]
		attrvalue = self.unescape(attrvalue)
		attrs.append((attrname.lower(), attrvalue))
		k = m.end()

		end = rawdata[k:endpos].strip()
		if end not in (">", "/>"):
		lineno, offset = self.getpos()
		if "\n" in self.__starttag_text:
		lineno = lineno + self.__starttag_text.count("\n")
		offset = len(self.__starttag_text) \
		- self.__starttag_text.rfind("\n")
		else:
		offset = offset + len(self.__starttag_text)
		self.error("junk characters in start tag: %r"
		% (rawdata[k:endpos][:20],))
		if end.endswith('/>'):
		# XHTML-style empty tag: <span attr="value" />
		self.handle_startendtag(tag, attrs)
		else:
		self.handle_starttag(tag, attrs)
		if tag in self.CDATA_CONTENT_ELEMENTS:
		self.set_cdata_mode(tag) # <--------------------------- Changed
		return endpos

		# Internal -- parse endtag, return end or -1 if incomplete
		def parse_endtag(self, i):
		rawdata = self.rawdata
		assert rawdata[i:i + 2] == "</", "unexpected call to parse_endtag"
		match = _HTMLParser.endendtag.search(rawdata, i + 1) # >
		if not match:
		return -1
		j = match.end()
		match = _HTMLParser.endtagfind.match(rawdata, i) # </ + tag + >
		if not match:
		if self.cdata_tag is not None: # * add *
		self.handle_data(rawdata[i:j]) # * add *
		return j # * add *
		self.error("bad end tag: %r" % (rawdata[i:j],))
		# --- changed start ---------------------------------------------------
		tag = match.group(1).strip()
		if self.cdata_tag is not None:
		if tag.lower() != self.cdata_tag:
		self.handle_data(rawdata[i:j])
		return j
		# --- changed end -----------------------------------------------------
		self.handle_endtag(tag.lower())
		self.clear_cdata_mode()
		return j

docs/releases/1.4.txt

+15 −0

Original line number	Diff line number	Diff line
		@@ -475,6 +475,21 @@ Time zone support is enabled by default in new projects created with
		:djadmin:`startproject`. If you want to use this feature in an existing
		project, read the :ref:`migration guide <time-zones-migration-guide>`.

		HTML comparisons in tests
		~~~~~~~~~~~~~~~~~~~~~~~~~

		The :class:`~django.test.testcase.TestCase` base class now has some helpers to
		compare HTML without tripping over irrelevant differences in whitespace,
		argument quoting and ordering, and closing of self-closing tags. HTML can
		either be compared directly with the new
		:meth:`~django.test.testcase.TestCase.assertHTMLEqual` and
		:meth:`~django.test.testcase.TestCase.assertHTMLNotEqual` assertions, or use
		the ``html=True`` flag with
		:meth:`~django.test.testcase.TestCase.assertContains` and
		:meth:`~django.test.testcase.TestCase.assertNotContains` to test if the test
		client's response contains a given HTML fragment. See the :ref:`assertion
		documentation<assertions>` for more information.

		Minor features
		~~~~~~~~~~~~~~

docs/topics/testing.txt

+60 −2

Original line number	Diff line number	Diff line
		@@ -1542,17 +1542,33 @@ your test suite.
		self.assertFieldOutput(EmailField, {'a@a.com': 'a@a.com'}, {'aaa': [u'Enter a valid e-mail address.']})


		.. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='')
		.. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='', html=False)

		Asserts that a ``Response`` instance produced the given ``status_code`` and
		that ``text`` appears in the content of the response. If ``count`` is
		provided, ``text`` must occur exactly ``count`` times in the response.

		.. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='')
		.. versionadded:: 1.4

		Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
		the response content will be based on HTML semantics instead of
		character-by-character equality. Whitespace is ignored in most cases,
		attribute ordering is not significant. See
		:func:`~TestCase.assertHTMLEqual` for more details.

		.. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='', html=False)

		Asserts that a ``Response`` instance produced the given ``status_code`` and
		that ``text`` does not appears in the content of the response.

		.. versionadded:: 1.4

		Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
		the response content will be based on HTML semantics instead of
		character-by-character equality. Whitespace is ignored in most cases,
		attribute ordering is not significant. See
		:func:`~TestCase.assertHTMLEqual` for more details.

		.. method:: TestCase.assertFormError(response, form, field, errors, msg_prefix='')

		Asserts that a field on a form raises the provided list of errors when
		@@ -1656,6 +1672,48 @@ your test suite.
		Person.objects.create(name="Aaron")
		Person.objects.create(name="Daniel")

		.. method:: TestCase.assertHTMLEqual(html1, html2, msg=None)

		.. versionadded:: 1.4

		Asserts that the strings ``html1`` and ``html2`` are equal. The comparison
		is based on HTML semantics. The comparison takes following things into
		account:

		* Whitespace before and after HTML tags is ignored
		* All types of whitespace are considered equivalent
		* All open tags are closed implicitly, i.e. when a surrounding tag is
		closed or the HTML document ends
		* Empty tags are equivalent to their self-closing version
		* The ordering of attributes of an HTML element is not significant
		* Attributes without an argument are equal to attributes that equal in
		name and value (see the examples)

		The following examples are valid tests and don't raise any
		``AssertionError``::

		self.assertHTMLEqual('<p>Hello <b>world!</p>',
		'''<p>
		Hello <b>world! <b/>
		</p>''')
		self.assertHTMLEqual(
		'<input type="checkbox" checked="checked" id="id_accept_terms" />',
		'<input id="id_accept_terms" type='checkbox' checked>')

		``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
		raised if one of them cannot be parsed.

		.. method:: TestCase.assertHTMLNotEqual(html1, html2, msg=None)

		.. versionadded:: 1.4

		Asserts that the strings ``html1`` and ``html2`` are not equal. The
		comparison is based on HTML semantics. See
		:func:`~TestCase.assertHTMLEqual` for details.

		``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
		raised if one of them cannot be parsed.


		.. _topics-testing-email: