[1.5.x] Simplified smart_urlquote and added some basic tests. (41492f0f) · Commits · Dom Sekotill / django

django/utils/html.py

+6 −9

Original line number	Diff line number	Diff line
		@@ -5,13 +5,13 @@ from __future__ import unicode_literals
		import re
		import string
		try:
		from urllib.parse import quote, urlsplit, urlunsplit
		from urllib.parse import quote, unquote, urlsplit, urlunsplit
		except ImportError: # Python 2
		from urllib import quote
		from urllib import quote, unquote
		from urlparse import urlsplit, urlunsplit

		from django.utils.safestring import SafeData, mark_safe
		from django.utils.encoding import force_bytes, force_text
		from django.utils.encoding import force_text, force_str
		from django.utils.functional import allow_lazy
		from django.utils import six
		from django.utils.text import normalize_newlines
		@@ -24,7 +24,6 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>')]
		DOTS = ['·', '*', '\u2022', '', '•', '•']

		unencoded_ampersands_re = re.compile(r'&(?!(\w+\|#\d+);)')
		unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})')
		word_split_re = re.compile(r'(\s+)')
		simple_url_re = re.compile(r'^https?://\w', re.IGNORECASE)
		simple_url_2_re = re.compile(r'^www\.\|^(?!http)\w[^@]+\.(com\|edu\|gov\|int\|mil\|net\|org)$', re.IGNORECASE)
		@@ -158,11 +157,9 @@ def smart_urlquote(url):
		else:
		url = urlunsplit((scheme, netloc, path, query, fragment))

		# An URL is considered unquoted if it contains no % characters or
		# contains a % not followed by two hexadecimal digits. See #9655.
		if '%' not in url or unquoted_percents_re.search(url):
		url = unquote(force_str(url))
		# See http://bugs.python.org/issue2637
		url = quote(force_bytes(url), safe=b'!*\'();:@&=+$,/?#[]~')
		url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')

		return force_text(url)

tests/regressiontests/defaultfilters/tests.py

+4 −3

Original line number	Diff line number	Diff line
		@@ -246,9 +246,10 @@ class DefaultFiltersTests(TestCase):
		'<a href="https://google.com" rel="nofollow">https://google.com</a>')

		# Check urlize doesn't overquote already quoted urls - see #9655
		self.assertEqual(urlize('http://hi.baidu.com/%D6%D8%D0%C2%BF'),
		'<a href="http://hi.baidu.com/%D6%D8%D0%C2%BF" rel="nofollow">'
		'http://hi.baidu.com/%D6%D8%D0%C2%BF</a>')
		# The teststring is the urlquoted version of 'http://hi.baidu.com/重新开始'
		self.assertEqual(urlize('http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B'),
		'<a href="http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B" rel="nofollow">'
		'http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B</a>')
		self.assertEqual(urlize('www.mystore.com/30%OffCoupons!'),
		'<a href="http://www.mystore.com/30%25OffCoupons!" rel="nofollow">'
		'www.mystore.com/30%OffCoupons!</a>')

tests/regressiontests/utils/html.py

+11 −0

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		from __future__ import unicode_literals

		import unittest
		@@ -157,3 +158,13 @@ class TestUtilsHtml(unittest.TestCase):
		)
		for value, tags, output in items:
		self.assertEqual(f(value, tags), output)

		def test_smart_urlquote(self):
		quote = html.smart_urlquote
		# Ensure that IDNs are properly quoted
		self.assertEqual(quote('http://öäü.com/'), 'http://xn--4ca9at.com/')
		self.assertEqual(quote('http://öäü.com/öäü/'), 'http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/')
		# Ensure that everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered safe as per RFC
		self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/')
		self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%C3%B6/%C3%A4/')
		self.assertEqual(quote('http://example.com/?x=1&y=2'), 'http://example.com/?x=1&y=2')