Commit 9f9fdc4b authored by Claude Paroz's avatar Claude Paroz
Browse files

[1.6.x] Fixed #22996 -- Prevented crash with unencoded query string

Thanks Jorge Carleitao for the report and Aymeric Augustin, Tim Graham
for the reviews.
Backport of fa02120d from master.
parent c0e49ef7
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -134,7 +134,7 @@ class WSGIRequest(http.HttpRequest):
            # The WSGI spec says 'QUERY_STRING' may be absent.
            raw_query_string = self.environ.get('QUERY_STRING', str(''))
            if six.PY3:
                raw_query_string = raw_query_string.encode('iso-8859-1').decode('utf-8')
                raw_query_string = raw_query_string.encode('iso-8859-1')
            self._get = http.QueryDict(raw_query_string, encoding=self._encoding)
        return self._get

+6 −2
Original line number Diff line number Diff line
@@ -290,8 +290,12 @@ class QueryDict(MultiValueDict):
        self.encoding = encoding
        if six.PY3:
            if isinstance(query_string, bytes):
                # query_string contains URL-encoded data, a subset of ASCII.
                query_string = query_string.decode()
                # query_string normally contains URL-encoded data, a subset of ASCII.
                try:
                    query_string = query_string.decode(encoding)
                except UnicodeDecodeError:
                    # ... but some user agents are misbehaving :-(
                    query_string = query_string.decode('iso-8859-1')
            for key, value in parse_qsl(query_string or '',
                                        keep_blank_values=True,
                                        encoding=encoding):
+3 −0
Original line number Diff line number Diff line
@@ -40,3 +40,6 @@ Bugfixes
* Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers
  widget (`#23137 <https://code.djangoproject.com/ticket/23137>`_,
  `#23293 <https://code.djangoproject.com/ticket/23293>`_).

* Prevented a crash on Python 3 with query strings containing unencoded
  non-ASCII characters (`#22996 <http://code.djangoproject.com/ticket/22996>`_).
+23 −7
Original line number Diff line number Diff line
@@ -42,14 +42,30 @@ class HandlerTests(TestCase):
        self.assertEqual(response.status_code, 400)

    def test_non_ascii_query_string(self):
        """Test that non-ASCII query strings are properly decoded (#20530)."""
        """
        Test that non-ASCII query strings are properly decoded (#20530, #22996).
        """
        environ = RequestFactory().get('/').environ
        raw_query_string = 'want=café'
        raw_query_strings = [
            b'want=caf%C3%A9', # This is the proper way to encode 'café'
            b'want=caf\xc3\xa9', # UA forgot to quote bytes
            b'want=caf%E9', # UA quoted, but not in UTF-8
            b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE)
        ]
        got = []
        for raw_query_string in raw_query_strings:
            if six.PY3:
            raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1')
                # Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request
                environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1')
            else:
                environ['QUERY_STRING'] = raw_query_string
            request = WSGIRequest(environ)
        self.assertEqual(request.GET['want'], "café")
            got.append(request.GET['want'])
        if six.PY2:
            self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'caf\ufffd'])
        else:
            # On Python 3, %E9 is converted to the unicode replacement character by parse_qsl
            self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café'])

    def test_non_ascii_cookie(self):
        """Test that non-ASCII cookies set in JavaScript are properly decoded (#20557)."""