Loading django/utils/html.py +27 −14 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from django.utils.functional import allow_lazy from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.safestring import SafeData, mark_safe from django.utils import six from django.utils.six.moves.urllib.parse import quote, unquote, urlsplit, urlunsplit from django.utils.six.moves.urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit from django.utils.text import normalize_newlines from .html_parser import HTMLParser, HTMLParseError Loading Loading @@ -218,25 +218,38 @@ strip_entities = allow_lazy(strip_entities, six.text_type) def smart_urlquote(url): "Quotes a URL if it isn't already quoted." def unquote_quote(segment): segment = unquote(force_str(segment)) # Tilde is part of RFC3986 Unreserved Characters # http://tools.ietf.org/html/rfc3986#section-2.3 # See also http://bugs.python.org/issue16285 segment = quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) return force_text(segment) # Handle IDN before quoting. try: scheme, netloc, path, query, fragment = urlsplit(url) except ValueError: # invalid IPv6 URL (normally square brackets in hostname part). return unquote_quote(url) try: netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE except UnicodeError: # invalid domain part pass else: url = urlunsplit((scheme, netloc, path, query, fragment)) except ValueError: # invalid IPv6 URL (normally square brackets in hostname part). pass return unquote_quote(url) url = unquote(force_str(url)) # See http://bugs.python.org/issue2637 url = quote(url, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) if query: # Separately unquoting key/value, so as to not mix querystring separators # included in query values. See #22267. query_parts = [(unquote(force_str(q[0])), unquote(force_str(q[1]))) for q in parse_qsl(query, keep_blank_values=True)] # urlencode will take care of quoting query = urlencode(query_parts) return force_text(url) path = unquote_quote(path) fragment = unquote_quote(fragment) return urlunsplit((scheme, netloc, path, query, fragment)) def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Loading tests/utils_tests/test_html.py +5 −1 Original line number Diff line number Diff line Loading @@ -173,7 +173,11 @@ class TestUtilsHtml(TestCase): # Ensure that everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered safe as per RFC self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/') self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%C3%B6/%C3%A4/') self.assertEqual(quote('http://example.com/?x=1&y=2'), 'http://example.com/?x=1&y=2') self.assertEqual(quote('http://example.com/?x=1&y=2+3&z='), 'http://example.com/?x=1&y=2+3&z=') self.assertEqual(quote('http://example.com/?q=http://example.com/?x=1%26q=django'), 'http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango') self.assertEqual(quote('http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango'), 'http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango') def test_conditional_escape(self): s = '<h1>interop</h1>' Loading Loading
django/utils/html.py +27 −14 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from django.utils.functional import allow_lazy from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.safestring import SafeData, mark_safe from django.utils import six from django.utils.six.moves.urllib.parse import quote, unquote, urlsplit, urlunsplit from django.utils.six.moves.urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit from django.utils.text import normalize_newlines from .html_parser import HTMLParser, HTMLParseError Loading Loading @@ -218,25 +218,38 @@ strip_entities = allow_lazy(strip_entities, six.text_type) def smart_urlquote(url): "Quotes a URL if it isn't already quoted." def unquote_quote(segment): segment = unquote(force_str(segment)) # Tilde is part of RFC3986 Unreserved Characters # http://tools.ietf.org/html/rfc3986#section-2.3 # See also http://bugs.python.org/issue16285 segment = quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) return force_text(segment) # Handle IDN before quoting. try: scheme, netloc, path, query, fragment = urlsplit(url) except ValueError: # invalid IPv6 URL (normally square brackets in hostname part). return unquote_quote(url) try: netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE except UnicodeError: # invalid domain part pass else: url = urlunsplit((scheme, netloc, path, query, fragment)) except ValueError: # invalid IPv6 URL (normally square brackets in hostname part). pass return unquote_quote(url) url = unquote(force_str(url)) # See http://bugs.python.org/issue2637 url = quote(url, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) if query: # Separately unquoting key/value, so as to not mix querystring separators # included in query values. See #22267. query_parts = [(unquote(force_str(q[0])), unquote(force_str(q[1]))) for q in parse_qsl(query, keep_blank_values=True)] # urlencode will take care of quoting query = urlencode(query_parts) return force_text(url) path = unquote_quote(path) fragment = unquote_quote(fragment) return urlunsplit((scheme, netloc, path, query, fragment)) def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): """ Loading
tests/utils_tests/test_html.py +5 −1 Original line number Diff line number Diff line Loading @@ -173,7 +173,11 @@ class TestUtilsHtml(TestCase): # Ensure that everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered safe as per RFC self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/') self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%C3%B6/%C3%A4/') self.assertEqual(quote('http://example.com/?x=1&y=2'), 'http://example.com/?x=1&y=2') self.assertEqual(quote('http://example.com/?x=1&y=2+3&z='), 'http://example.com/?x=1&y=2+3&z=') self.assertEqual(quote('http://example.com/?q=http://example.com/?x=1%26q=django'), 'http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango') self.assertEqual(quote('http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango'), 'http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango') def test_conditional_escape(self): s = '<h1>interop</h1>' Loading