This project is archived. Its data is read-only. This project is read-only.

Commit bf1871d8 authored Nov 24, 2012 by Chris Khoo Committed by Claude Paroz Nov 24, 2012

Fixed #19237 -- Improved strip_tags utility

The previous pattern didn't properly addressed cases where '>'
was present inside quoted tag content.

parent be64dd35

django/utils/html.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -33,6 +33,7 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
		html_gunk_re = re.compile(r'(?:<br clear="all">\|<i><\/i>\|<b><\/b>\|<em><\/em>\|<strong><\/strong>\|<\/?smallcaps>\|<\/?uppercase>)', re.IGNORECASE)
		hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).?[a-zA-Z].?</p>\s*)+)' % '\|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
		trailing_empty_content_re = re.compile(r'(?:<p>(?: \|\s\|<br \/>)?</p>\s)+\Z')
		strip_tags_re = re.compile(r'</?\S([^=]=(\s"[^"]"\|\s\'[^\']\'\|\S)\|[^>])*?>', re.IGNORECASE)


		def escape(text):
		@@ -117,7 +118,7 @@ linebreaks = allow_lazy(linebreaks, six.text_type)

		def strip_tags(value):
		"""Returns the given HTML with all tags stripped."""
		return re.sub(r'<[^>]*?>', '', force_text(value))
		return strip_tags_re.sub('', force_text(value))
		strip_tags = allow_lazy(strip_tags)

		def remove_tags(html, tags):

tests/regressiontests/utils/html.py

+3 −0

Original line number	Diff line number	Diff line
		@@ -65,6 +65,9 @@ class TestUtilsHtml(unittest.TestCase):
		('<f', '<f'),
		('</fe', '</fe'),
		('<x>b<y>', 'b'),
		('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),
		('a<p a >b</p>c', 'abc'),
		('d<a:b c:d>e</p>f', 'def'),
		)
		for value, output in items:
		self.check_output(f, value, output)