Commit 2e65d561 authored by Danilo Bargen's avatar Danilo Bargen Committed by Tim Graham
Browse files

Fixed #20003 -- Improved and extended URLValidator

This adds support for authentication data (`user:password`) in URLs,
IPv6 addresses, and unicode domains.

The test suite has been improved by adding test URLs from
http://mathiasbynens.be/demo/url-regex (with a few adjustments,
like allowing local and reserved IPs).

The previous URL validation regex failed this test suite on 13
occasions, the validator was updated based on
https://gist.github.com/dperini/729294.
parent 6288fccf
Loading
Loading
Loading
Loading
+25 −6
Original line number Diff line number Diff line
@@ -66,14 +66,25 @@ class RegexValidator(object):

@deconstructible
class URLValidator(RegexValidator):
    ul = '\u00a1-\uffff'  # unicode letters range (must be a unicode string, not a raw string)

    # IP patterns
    ipv4_re = r'(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'
    ipv6_re = r'\[[0-9a-f:\.]+\]'  # (simple regex, validated later)

    # Host patterns
    hostname_re = r'[a-z' + ul + r'0-9](?:[a-z' + ul + r'0-9-]*[a-z' + ul + r'0-9])?'
    domain_re = r'(?:\.[a-z' + ul + r'0-9]+(?:[a-z' + ul + r'0-9-]*[a-z' + ul + r'0-9]+)*)*'
    tld_re = r'\.[a-z' + ul + r']{2,}\.?'
    host_re = '(' + hostname_re + domain_re + tld_re + '|localhost)'

    regex = re.compile(
        r'^(?:[a-z0-9\.\-]*)://'  # scheme is validated separately
        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|'  # domain...
        r'localhost|'  # localhost...
        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|'  # ...or ipv4
        r'\[?[A-F0-9]*:[A-F0-9:]+\]?)'  # ...or ipv6
        r'(?::\d+)?'  # optional port
        r'(?:/?|[/?]\S+)$', re.IGNORECASE)
        r'(?:\S+(?::\S*)?@)?'  # user:pass authentication
        r'(?:' + ipv4_re + '|' + ipv6_re + '|' + host_re + ')'
        r'(?::\d{2,5})?'  # port
        r'(?:[/?#][^\s]*)?'  # resource path
        r'$', re.IGNORECASE)
    message = _('Enter a valid URL.')
    schemes = ['http', 'https', 'ftp', 'ftps']

@@ -105,6 +116,14 @@ class URLValidator(RegexValidator):
            else:
                raise
        else:
            # Now verify IPv6 in the netloc part
            host_match = re.search(r'^\[(.+)\](?::\d{2,5})?$', urlsplit(value).netloc)
            if host_match:
                potential_ip = host_match.groups()[0]
                try:
                    validate_ipv6_address(potential_ip)
                except ValidationError:
                    raise ValidationError(self.message, code=self.code)
            url = value


+12 −3
Original line number Diff line number Diff line
@@ -149,9 +149,13 @@ to, or in lieu of custom ``field.clean()`` methods.
.. class:: URLValidator([schemes=None, regex=None, message=None, code=None])

    A :class:`RegexValidator` that ensures a value looks like a URL, and raises
    an error code of ``'invalid'`` if it doesn't. In addition to the optional
    arguments of its parent :class:`RegexValidator` class, ``URLValidator``
    accepts an extra optional attribute:
    an error code of ``'invalid'`` if it doesn't.

    Loopback addresses and reserved IP spaces are considered valid. Literal
    IPv6 addresses (:rfc:`2732`) and unicode domains are both supported.

    In addition to the optional arguments of its parent :class:`RegexValidator`
    class, ``URLValidator`` accepts an extra optional attribute:

    .. attribute:: schemes

@@ -165,6 +169,11 @@ to, or in lieu of custom ``field.clean()`` methods.

        The optional ``schemes`` attribute was added.

    .. versionchanged:: 1.8

        Support for IPv6 addresses, unicode domains, and URLs containing
        authentication data was added.

``validate_email``
------------------
.. data:: validate_email
+2 −1
Original line number Diff line number Diff line
@@ -606,7 +606,8 @@ Tests
Validators
^^^^^^^^^^

* ...
* :class:`~django.core.validators.URLValidator` now supports IPv6 addresses,
  unicode domains, and URLs containing authentication data.

Backwards incompatible changes in 1.8
=====================================
+1 −3
Original line number Diff line number Diff line
@@ -896,9 +896,7 @@ class FieldsTests(SimpleTestCase):
        """Test URLField correctly validates IPv6 (#18779)."""
        f = URLField()
        urls = (
            'http://::/',
            'http://6:21b4:92/',
            'http://[12:34:3a53]/',
            'http://[12:34::3a53]/',
            'http://[a34:9238::]:8080/',
        )
        for url in urls:
+36 −0
Original line number Diff line number Diff line
@@ -10,3 +10,39 @@ http://inv-.alid-.com
http://inv-.-alid.com
file://localhost/path
git://example.com/
http://.
http://..
http://../
http://?
http://??
http://??/
http://#
http://##
http://##/
http://foo.bar?q=Spaces should be encoded
//
//a
///a
///
http:///a
foo.com
rdar://1234
h://test
http:// shouldfail.com
:// should fail
http://foo.bar/foo(bar)baz quux
http://-error-.invalid/
http://-a.b.co
http://a.b-.co
http:/
http://
http://
http://1.1.1.1.1
http://123.123.123
http://3628126748
http://123
http://.www.foo.bar/
http://.www.foo.bar./
http://[::1:2::3]:8080/
http://[]
http://[]:8080
Loading