Commit 72ad014b authored by Claude Paroz's avatar Claude Paroz
Browse files

[1.7.x] Fixed #22996 -- Prevented crash with unencoded query string

Thanks Jorge Carleitao for the report and Aymeric Augustin, Tim Graham
for the reviews.
Backport of fa02120d from master.
parent 6e5e2b0e
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -313,8 +313,12 @@ class QueryDict(MultiValueDict):
        self.encoding = encoding
        if six.PY3:
            if isinstance(query_string, bytes):
                # query_string contains URL-encoded data, a subset of ASCII.
                query_string = query_string.decode()
                # query_string normally contains URL-encoded data, a subset of ASCII.
                try:
                    query_string = query_string.decode(encoding)
                except UnicodeDecodeError:
                    # ... but some user agents are misbehaving :-(
                    query_string = query_string.decode('iso-8859-1')
            for key, value in parse_qsl(query_string or '',
                                        keep_blank_values=True,
                                        encoding=encoding):
@@ -322,8 +326,12 @@ class QueryDict(MultiValueDict):
        else:
            for key, value in parse_qsl(query_string or '',
                                        keep_blank_values=True):
                try:
                    value = value.decode(encoding)
                except UnicodeDecodeError:
                    value = value.decode('iso-8859-1')
                self.appendlist(force_text(key, encoding, errors='replace'),
                                force_text(value, encoding, errors='replace'))
                                value)
        self._mutable = mutable

    @property
+3 −0
Original line number Diff line number Diff line
@@ -40,3 +40,6 @@ Bugfixes
* Fixed JavaScript errors while editing multi-geometry objects in the OpenLayers
  widget (`#23137 <https://code.djangoproject.com/ticket/23137>`_,
  `#23293 <https://code.djangoproject.com/ticket/23293>`_).

* Prevented a crash on Python 3 with query strings containing unencoded
  non-ASCII characters (`#22996 <http://code.djangoproject.com/ticket/22996>`_).
+3 −0
Original line number Diff line number Diff line
@@ -1425,6 +1425,9 @@ Miscellaneous
  databases, use the :djadminopt:`--database` flag to get SQL for those
  models (previously they would always be included in the output).

* Decoding the query string from URLs now fallbacks to the ISO-8859-1 encoding
  when the input is not valid UTF-8.

.. _deprecated-features-1.7:

Features deprecated in 1.7
+23 −7
Original line number Diff line number Diff line
@@ -42,14 +42,30 @@ class HandlerTests(TestCase):
        self.assertEqual(response.status_code, 400)

    def test_non_ascii_query_string(self):
        """Test that non-ASCII query strings are properly decoded (#20530)."""
        """
        Test that non-ASCII query strings are properly decoded (#20530, #22996).
        """
        environ = RequestFactory().get('/').environ
        raw_query_string = 'want=café'
        raw_query_strings = [
            b'want=caf%C3%A9', # This is the proper way to encode 'café'
            b'want=caf\xc3\xa9', # UA forgot to quote bytes
            b'want=caf%E9', # UA quoted, but not in UTF-8
            b'want=caf\xe9', # UA forgot to convert Latin-1 to UTF-8 and to quote (typical of MSIE)
        ]
        got = []
        for raw_query_string in raw_query_strings:
            if six.PY3:
            raw_query_string = raw_query_string.encode('utf-8').decode('iso-8859-1')
                # Simulate http.server.BaseHTTPRequestHandler.parse_request handling of raw request
                environ['QUERY_STRING'] = str(raw_query_string, 'iso-8859-1')
            else:
                environ['QUERY_STRING'] = raw_query_string
            request = WSGIRequest(environ)
        self.assertEqual(request.GET['want'], "café")
            got.append(request.GET['want'])
        if six.PY2:
            self.assertListEqual(got, ['café', 'café', 'café', 'café'])
        else:
            # On Python 3, %E9 is converted to the unicode replacement character by parse_qsl
            self.assertListEqual(got, ['café', 'café', 'caf\ufffd', 'café'])

    def test_non_ascii_cookie(self):
        """Test that non-ASCII cookies set in JavaScript are properly decoded (#20557)."""
+3 −3
Original line number Diff line number Diff line
@@ -202,14 +202,14 @@ class QueryDictTests(unittest.TestCase):
        def test_invalid_input_encoding(self):
            """
            QueryDicts must be able to handle invalid input encoding (in this
            case, bad UTF-8 encoding).
            case, bad UTF-8 encoding), falling back to ISO-8859-1 decoding.

            This test doesn't apply under Python 3 because the URL is a string
            and not a bytestring.
            """
            q = QueryDict(str(b'foo=bar&foo=\xff'))
            self.assertEqual(q['foo'], '\ufffd')
            self.assertEqual(q.getlist('foo'), ['bar', '\ufffd'])
            self.assertEqual(q['foo'], '\xff')
            self.assertEqual(q.getlist('foo'), ['bar', '\xff'])

    def test_pickle(self):
        q = QueryDict(str(''))