Commit fa02120d authored by Claude Paroz's avatar Claude Paroz
Browse files

Fixed #22996 -- Prevented crash with unencoded query string

Thanks Jorge Carleitao for the report and Aymeric Augustin, Tim Graham
for the reviews.
parent 11d9cbe2
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -329,8 +329,12 @@ class QueryDict(MultiValueDict):
        self.encoding = encoding
        if six.PY3:
            if isinstance(query_string, bytes):
                # query_string contains URL-encoded data, a subset of ASCII.
                query_string = query_string.decode()
                # query_string normally contains URL-encoded data, a subset of ASCII.
                try:
                    query_string = query_string.decode(encoding)
                except UnicodeDecodeError:
                    # ... but some user agents are misbehaving :-(
                    query_string = query_string.decode('iso-8859-1')
            for key, value in parse_qsl(query_string or '',
                                        keep_blank_values=True,
                                        encoding=encoding):
@@ -338,8 +342,12 @@ class QueryDict(MultiValueDict):
        else:
            for key, value in parse_qsl(query_string or '',
                                        keep_blank_values=True):
                try:
                    value = value.decode(encoding)
                except UnicodeDecodeError:
                    value = value.decode('iso-8859-1')
                self.appendlist(force_text(key, encoding, errors='replace'),
                                force_text(value, encoding, errors='replace'))
                                value)
        self._mutable = mutable

    @property
+3 −0
Original line number Diff line number Diff line
@@ -40,3 +40,6 @@ Bugfixes
* Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers
  widget (`#23137 <https://code.djangoproject.com/ticket/23137>`_,
  `#23293 <https://code.djangoproject.com/ticket/23293>`_).

* Prevented a crash on Python 3 with query strings containing unencoded
  non-ASCII characters (`#22996 <http://code.djangoproject.com/ticket/22996>`_).
+3 −0
Original line number Diff line number Diff line
@@ -1425,6 +1425,9 @@ Miscellaneous
  databases, use the :djadminopt:`--database` flag to get SQL for those
  models (previously they would always be included in the output).

* Decoding the query string from URLs now fallbacks to the ISO-8859-1 encoding
  when the input is not valid UTF-8.

.. _deprecated-features-1.7:

Features deprecated in 1.7
+23 −7
Original line number Diff line number Diff line
@@ -42,14 +42,30 @@ class HandlerTests(TestCase):
        self.assertEqual(response.status_code, 400)

    def test_non_ascii_query_string(self):
        """Test that non-ASCII query strings are properly decoded (#20530)."""
        """
        Test that non-ASCII query strings are properly decoded (#20530, #22996).
        """
        environ = RequestFactory().get('/').environ
        raw_query_string = 'want=café'
        raw_query_strings = [
            b'want=caf%C3%A9', # This is the proper way to encode 'café'
            b'want=caf\xc3\xa9', # UA forgot to quote bytes
            b'want=caf%E9', # UA quoted, but not in UTF-8
            b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE)
        ]
        got = []
        for raw_query_string in raw_query_strings:
            if six.PY3:
            raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1')
                # Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request
                environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1')
            else:
                environ['QUERY_STRING'] = raw_query_string
            request = WSGIRequest(environ)
        self.assertEqual(request.GET['want'], "café")
            got.append(request.GET['want'])
        if six.PY2:
            self.assertListEqual(got, ['café', 'café', 'café', 'café'])
        else:
            # On Python 3, %E9 is converted to the unicode replacement character by parse_qsl
            self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café'])

    def test_non_ascii_cookie(self):
        """Test that non-ASCII cookies set in JavaScript are properly decoded (#20557)."""
+3 −3
Original line number Diff line number Diff line
@@ -203,14 +203,14 @@ class QueryDictTests(unittest.TestCase):
        def test_invalid_input_encoding(self):
            """
            QueryDicts must be able to handle invalid input encoding (in this
            case, bad UTF-8 encoding).
            case, bad UTF-8 encoding), falling back to ISO-8859-1 decoding.

            This test doesn't apply under Python 3 because the URL is a string
            and not a bytestring.
            """
            q = QueryDict(str(b'foo=bar&foo=\xff'))
            self.assertEqual(q['foo'], '\ufffd')
            self.assertEqual(q.getlist('foo'), ['bar', '\ufffd'])
            self.assertEqual(q['foo'], '\xff')
            self.assertEqual(q.getlist('foo'), ['bar', '\xff'])

    def test_pickle(self):
        q = QueryDict()