Commit 6de6988f authored by Claude Paroz's avatar Claude Paroz
Browse files

Fixed #5076 -- Properly decode POSTs with non-utf-8 payload encoding

Thanks daniel at blogg.se for the report and Aymeric Augustin for
his assistance on the patch.
parent 9741912a
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
from __future__ import unicode_literals

import codecs
import logging
import sys
from io import BytesIO
@@ -144,6 +145,14 @@ class WSGIRequest(http.HttpRequest):
        self.META['PATH_INFO'] = path_info
        self.META['SCRIPT_NAME'] = script_name
        self.method = environ['REQUEST_METHOD'].upper()
        _, content_params = self._parse_content_type(self.META.get('CONTENT_TYPE', ''))
        if 'charset' in content_params:
            try:
                codecs.lookup(content_params['charset'])
            except LookupError:
                pass
            else:
                self.encoding = content_params['charset']
        self._post_parse_error = False
        try:
            content_length = int(self.environ.get('CONTENT_LENGTH'))
@@ -155,6 +164,21 @@ class WSGIRequest(http.HttpRequest):
    def _is_secure(self):
        return 'wsgi.url_scheme' in self.environ and self.environ['wsgi.url_scheme'] == 'https'

    def _parse_content_type(self, ctype):
        """
        Media Types parsing according to RFC 2616, section 3.7.

        Returns the data type and parameters. For example:
        Input: "text/plain; charset=iso-8859-1"
        Output: ('text/plain', {'charset': 'iso-8859-1'})
        """
        content_type, _, params = ctype.partition(';')
        content_params = {}
        for parameter in params.split(';'):
            k, _, v = parameter.strip().partition('=')
            content_params[k] = v
        return content_type, content_params

    def _get_request(self):
        if not hasattr(self, '_request'):
            self._request = datastructures.MergeDict(self.POST, self.GET)
+15 −0
Original line number Diff line number Diff line
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals

import time
@@ -352,6 +353,20 @@ class RequestsTests(unittest.TestCase):
        self.assertRaises(Exception, lambda: request.body)
        self.assertEqual(request.POST, {})

    def test_alternate_charset_POST(self):
        """
        Test a POST with non-utf-8 payload encoding.
        """
        from django.utils.http import urllib_parse
        payload = FakePayload(urllib_parse.urlencode({'key': 'España'.encode('latin-1')}))
        request = WSGIRequest({
            'REQUEST_METHOD': 'POST',
            'CONTENT_LENGTH': len(payload),
            'CONTENT_TYPE': 'application/x-www-form-urlencoded; charset=iso-8859-1',
            'wsgi.input': payload,
        })
        self.assertEqual(request.POST, {'key': ['España']})

    def test_body_after_POST_multipart(self):
        """
        Reading body after parsing multipart is not allowed