Commit 41492f0f authored by Florian Apolloner's avatar Florian Apolloner
Browse files

[1.5.x] Simplified smart_urlquote and added some basic tests.

Backport of b70c371f from master.
parent dd2a512f
Loading
Loading
Loading
Loading
+6 −9
Original line number Diff line number Diff line
@@ -5,13 +5,13 @@ from __future__ import unicode_literals
import re
import string
try:
    from urllib.parse import quote, urlsplit, urlunsplit
    from urllib.parse import quote, unquote, urlsplit, urlunsplit
except ImportError:     # Python 2
    from urllib import quote
    from urllib import quote, unquote
    from urlparse import urlsplit, urlunsplit

from django.utils.safestring import SafeData, mark_safe
from django.utils.encoding import force_bytes, force_text
from django.utils.encoding import force_text, force_str
from django.utils.functional import allow_lazy
from django.utils import six
from django.utils.text import normalize_newlines
@@ -24,7 +24,6 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;')]
DOTS = ['&middot;', '*', '\u2022', '&#149;', '&bull;', '&#8226;']

unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})')
word_split_re = re.compile(r'(\s+)')
simple_url_re = re.compile(r'^https?://\w', re.IGNORECASE)
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE)
@@ -158,11 +157,9 @@ def smart_urlquote(url):
    else:
        url = urlunsplit((scheme, netloc, path, query, fragment))

    # An URL is considered unquoted if it contains no % characters or
    # contains a % not followed by two hexadecimal digits. See #9655.
    if '%' not in url or unquoted_percents_re.search(url):
    url = unquote(force_str(url))
    # See http://bugs.python.org/issue2637
        url = quote(force_bytes(url), safe=b'!*\'();:@&=+$,/?#[]~')
    url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')

    return force_text(url)

+4 −3
Original line number Diff line number Diff line
@@ -246,9 +246,10 @@ class DefaultFiltersTests(TestCase):
            '<a href="https://google.com" rel="nofollow">https://google.com</a>')

        # Check urlize doesn't overquote already quoted urls - see #9655
        self.assertEqual(urlize('http://hi.baidu.com/%D6%D8%D0%C2%BF'),
            '<a href="http://hi.baidu.com/%D6%D8%D0%C2%BF" rel="nofollow">'
            'http://hi.baidu.com/%D6%D8%D0%C2%BF</a>')
        # The teststring is the urlquoted version of 'http://hi.baidu.com/重新开始'
        self.assertEqual(urlize('http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B'),
            '<a href="http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B" rel="nofollow">'
            'http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B</a>')
        self.assertEqual(urlize('www.mystore.com/30%OffCoupons!'),
            '<a href="http://www.mystore.com/30%25OffCoupons!" rel="nofollow">'
            'www.mystore.com/30%OffCoupons!</a>')
+11 −0
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import unittest
@@ -157,3 +158,13 @@ class TestUtilsHtml(unittest.TestCase):
        )
        for value, tags, output in items:
            self.assertEqual(f(value, tags), output)

    def test_smart_urlquote(self):
        quote = html.smart_urlquote
        # Ensure that IDNs are properly quoted
        self.assertEqual(quote('http://öäü.com/'), 'http://xn--4ca9at.com/')
        self.assertEqual(quote('http://öäü.com/öäü/'), 'http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/')
        # Ensure that everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered safe as per RFC
        self.assertEqual(quote('http://example.com/path/öäü/'), 'http://example.com/path/%C3%B6%C3%A4%C3%BC/')
        self.assertEqual(quote('http://example.com/%C3%B6/ä/'), 'http://example.com/%C3%B6/%C3%A4/')
        self.assertEqual(quote('http://example.com/?x=1&y=2'), 'http://example.com/?x=1&y=2')