Loading django/utils/html.py +27 −4 Original line number Diff line number Diff line Loading @@ -118,7 +118,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type) class MLStripper(HTMLParser): def __init__(self): if six.PY2: HTMLParser.__init__(self) else: HTMLParser.__init__(self, strict=False) self.reset() self.fed = [] Loading @@ -135,16 +138,36 @@ class MLStripper(HTMLParser): return ''.join(self.fed) def strip_tags(value): """Returns the given HTML with all tags stripped.""" def _strip_once(value): """ Internal tag stripping utility used by strip_tags. """ s = MLStripper() try: s.feed(value) s.close() except HTMLParseError: return value try: s.close() except (HTMLParseError, UnboundLocalError) as err: # UnboundLocalError because of http://bugs.python.org/issue17802 # on Python 3.2, triggered by strict=False mode of HTMLParser return s.get_data() + s.rawdata else: return s.get_data() def strip_tags(value): """Returns the given HTML with all tags stripped.""" while True: if not ('<' in value or '>' in value): return value new_value = _strip_once(value) if new_value == value: # _strip_once was not able to detect more tags return value else: value = new_value strip_tags = allow_lazy(strip_tags) Loading docs/ref/templates/builtins.txt +11 −1 Original line number Diff line number Diff line Loading @@ -1985,7 +1985,7 @@ If ``value`` is ``10``, the output will be ``1.000000E+01``. striptags ^^^^^^^^^ Strips all [X]HTML tags. Makes all possible efforts to strip all [X]HTML tags. For example:: Loading @@ -1994,6 +1994,16 @@ For example:: If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``, the output will be ``"Joel is a slug"``. .. admonition:: No safety guarantee Note that ``striptags`` doesn't give any guarantee about its output being entirely HTML safe, particularly with non valid HTML input. So **NEVER** apply the ``safe`` filter to a ``striptags`` output. If you are looking for something more robust, you can use the ``bleach`` Python library, notably its `clean`_ method. .. _clean: http://bleach.readthedocs.org/en/latest/clean.html .. templatefilter:: time time Loading docs/ref/utils.txt +11 −5 Original line number Diff line number Diff line Loading @@ -595,17 +595,23 @@ escaping HTML. .. function:: strip_tags(value) Removes anything that looks like an html tag from the string, that is anything contained within ``<>``. Tries to remove anything that looks like an HTML tag from the string, that is anything contained within ``<>``. Absolutely NO guaranty is provided about the resulting string being entirely HTML safe. So NEVER mark safe the result of a ``strip_tag`` call without escaping it first, for example with :func:`~django.utils.html.escape`. For example:: strip_tags(value) If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` the return value will be ``"Joel is a slug"``. Note that ``strip_tags`` result may still contain unsafe HTML content, so you might use :func:`~django.utils.html.escape` to make it a safe string. the return value will be ``"Joel is a slug"``. If you are looking for a more robust solution, take a look at the `bleach`_ Python library. .. _bleach: https://pypi.python.org/pypi/bleach .. versionchanged:: 1.6 Loading tests/utils_tests/test_html.py +2 −0 Original line number Diff line number Diff line Loading @@ -80,6 +80,8 @@ class TestUtilsHtml(TestCase): ('a<p a >b</p>c', 'abc'), ('d<a:b c:d>e</p>f', 'def'), ('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'), ('<sc<!-- -->ript>test<<!-- -->/script>', 'test'), ('<script>alert()</script>&h', 'alert()&h'), ) for value, output in items: self.check_output(f, value, output) Loading Loading
django/utils/html.py +27 −4 Original line number Diff line number Diff line Loading @@ -118,7 +118,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type) class MLStripper(HTMLParser): def __init__(self): if six.PY2: HTMLParser.__init__(self) else: HTMLParser.__init__(self, strict=False) self.reset() self.fed = [] Loading @@ -135,16 +138,36 @@ class MLStripper(HTMLParser): return ''.join(self.fed) def strip_tags(value): """Returns the given HTML with all tags stripped.""" def _strip_once(value): """ Internal tag stripping utility used by strip_tags. """ s = MLStripper() try: s.feed(value) s.close() except HTMLParseError: return value try: s.close() except (HTMLParseError, UnboundLocalError) as err: # UnboundLocalError because of http://bugs.python.org/issue17802 # on Python 3.2, triggered by strict=False mode of HTMLParser return s.get_data() + s.rawdata else: return s.get_data() def strip_tags(value): """Returns the given HTML with all tags stripped.""" while True: if not ('<' in value or '>' in value): return value new_value = _strip_once(value) if new_value == value: # _strip_once was not able to detect more tags return value else: value = new_value strip_tags = allow_lazy(strip_tags) Loading
docs/ref/templates/builtins.txt +11 −1 Original line number Diff line number Diff line Loading @@ -1985,7 +1985,7 @@ If ``value`` is ``10``, the output will be ``1.000000E+01``. striptags ^^^^^^^^^ Strips all [X]HTML tags. Makes all possible efforts to strip all [X]HTML tags. For example:: Loading @@ -1994,6 +1994,16 @@ For example:: If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``, the output will be ``"Joel is a slug"``. .. admonition:: No safety guarantee Note that ``striptags`` doesn't give any guarantee about its output being entirely HTML safe, particularly with non valid HTML input. So **NEVER** apply the ``safe`` filter to a ``striptags`` output. If you are looking for something more robust, you can use the ``bleach`` Python library, notably its `clean`_ method. .. _clean: http://bleach.readthedocs.org/en/latest/clean.html .. templatefilter:: time time Loading
docs/ref/utils.txt +11 −5 Original line number Diff line number Diff line Loading @@ -595,17 +595,23 @@ escaping HTML. .. function:: strip_tags(value) Removes anything that looks like an html tag from the string, that is anything contained within ``<>``. Tries to remove anything that looks like an HTML tag from the string, that is anything contained within ``<>``. Absolutely NO guaranty is provided about the resulting string being entirely HTML safe. So NEVER mark safe the result of a ``strip_tag`` call without escaping it first, for example with :func:`~django.utils.html.escape`. For example:: strip_tags(value) If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` the return value will be ``"Joel is a slug"``. Note that ``strip_tags`` result may still contain unsafe HTML content, so you might use :func:`~django.utils.html.escape` to make it a safe string. the return value will be ``"Joel is a slug"``. If you are looking for a more robust solution, take a look at the `bleach`_ Python library. .. _bleach: https://pypi.python.org/pypi/bleach .. versionchanged:: 1.6 Loading
tests/utils_tests/test_html.py +2 −0 Original line number Diff line number Diff line Loading @@ -80,6 +80,8 @@ class TestUtilsHtml(TestCase): ('a<p a >b</p>c', 'abc'), ('d<a:b c:d>e</p>f', 'def'), ('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'), ('<sc<!-- -->ript>test<<!-- -->/script>', 'test'), ('<script>alert()</script>&h', 'alert()&h'), ) for value, output in items: self.check_output(f, value, output) Loading