Commit c548c8d0 authored by Unai Zalakain's avatar Unai Zalakain Committed by Tim Graham
Browse files

Fixed #18456 -- Added path escaping to HttpRequest.get_full_path().

parent d3db878e
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -15,7 +15,9 @@ from django.core.files import uploadhandler
from django.http.multipartparser import MultiPartParser, MultiPartParserError
from django.utils import six
from django.utils.datastructures import MultiValueDict, ImmutableList
from django.utils.encoding import force_bytes, force_text, force_str, iri_to_uri
from django.utils.encoding import (
    force_bytes, force_text, force_str, escape_uri_path, iri_to_uri,
)
from django.utils.six.moves.urllib.parse import parse_qsl, urlencode, quote, urljoin, urlsplit


@@ -98,7 +100,7 @@ class HttpRequest(object):
        # RFC 3986 requires query string arguments to be in the ASCII range.
        # Rather than crash if this doesn't happen, we encode defensively.
        return '%s%s' % (
            self.path,
            escape_uri_path(self.path),
            ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
        )

+17 −0
Original line number Diff line number Diff line
@@ -226,6 +226,23 @@ def uri_to_iri(uri):
    return repercent_broken_unicode(iri).decode('utf-8')


def escape_uri_path(path):
    """
    Escape the unsafe characters from the path portion of a Uniform Resource
    Identifier (URI).
    """
    # These are the "reserved" and "unreserved" characters specified in
    # sections 2.2 and 2.3 of RFC 2396:
    #   reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
    #   unreserved  = alphanum | mark
    #   mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
    # The list of safe characters here is constructed substracting ";", "=",
    # and "?" according to section 3.3 of RFC 2396.
    # The reason for not subtracting and escaping "/" is that we are escaping
    # the entire path, not a path segment.
    return quote(force_bytes(path), safe=b"/:@&+$,-_.!~*'()")


def repercent_broken_unicode(path):
    """
    As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,
+7 −0
Original line number Diff line number Diff line
@@ -298,6 +298,13 @@ The functions defined in this module share the following properties:

    Returns an ASCII string containing the encoded result.

.. function:: escape_uri_path(path)

    .. versionadded:: 1.8

    Escapes the unsafe characters from the path portion of a Uniform Resource
    Identifier (URI).

``django.utils.feedgenerator``
==============================

+4 −0
Original line number Diff line number Diff line
@@ -381,6 +381,10 @@ Requests and Responses
* ``WSGIRequestHandler`` now follows RFC in converting URI to IRI, using
  ``uri_to_iri()``.

* The :meth:`HttpRequest.get_full_path()
  <django.http.HttpRequest.get_full_path>` method now escapes unsafe characters
  from the path portion of a Uniform Resource Identifier (URI) properly.

Tests
^^^^^

+13 −0
Original line number Diff line number Diff line
@@ -35,6 +35,19 @@ class RequestsTests(SimpleTestCase):
        # and FILES should be MultiValueDict
        self.assertEqual(request.FILES.getlist('foo'), [])

    def test_httprequest_full_path(self):
        request = HttpRequest()
        request.path = request.path_info = '/;some/?awful/=path/foo:bar/'
        request.META['QUERY_STRING'] = ';some=query&+query=string'
        expected = '/%3Bsome/%3Fawful/%3Dpath/foo:bar/?;some=query&+query=string'
        self.assertEqual(request.get_full_path(), expected)

    def test_httprequest_full_path_with_query_string_and_fragment(self):
        request = HttpRequest()
        request.path = request.path_info = '/foo#bar'
        request.META['QUERY_STRING'] = 'baz#quux'
        self.assertEqual(request.get_full_path(), '/foo%23bar?baz#quux')

    def test_httprequest_repr(self):
        request = HttpRequest()
        request.path = '/somepath/'
Loading