Commit 929684d6 authored by Andre Cruz's avatar Andre Cruz Committed by Tim Graham
Browse files

Fixed #21231 -- Enforced a max size for GET/POST values read into memory.

Thanks Tom Christie for review.
parent 4065f429
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -285,6 +285,14 @@ FILE_UPLOAD_HANDLERS = [
# file system instead of into memory.
FILE_UPLOAD_MAX_MEMORY_SIZE = 2621440  # i.e. 2.5 MB

# Maximum size in bytes of request data (excluding file uploads) that will be
# read before a SuspiciousOperation (RequestDataTooBig) is raised.
DATA_UPLOAD_MAX_MEMORY_SIZE = 2621440  # i.e. 2.5 MB

# Maximum number of GET/POST parameters that will be read before a
# SuspiciousOperation (TooManyFieldsSent) is raised.
DATA_UPLOAD_MAX_NUMBER_FIELDS = 1000

# Directory in which upload streamed files will be temporarily saved. A value of
# `None` will make Django use the operating system's default temporary directory
# (i.e. "/tmp" on *nix systems).
+16 −0
Original line number Diff line number Diff line
@@ -53,6 +53,22 @@ class DisallowedRedirect(SuspiciousOperation):
    pass


class TooManyFieldsSent(SuspiciousOperation):
    """
    The number of fields in a POST request exceeded
    settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.
    """
    pass


class RequestDataTooBig(SuspiciousOperation):
    """
    The size of the request (excluding any file uploads) exceeded
    settings.DATA_UPLOAD_MAX_MEMORY_SIZE.
    """
    pass


class PermissionDenied(Exception):
    """The user did not have permission to do that"""
    pass
+34 −3
Original line number Diff line number Diff line
@@ -12,7 +12,9 @@ import cgi
import sys

from django.conf import settings
from django.core.exceptions import SuspiciousMultipartForm
from django.core.exceptions import (
    RequestDataTooBig, SuspiciousMultipartForm, TooManyFieldsSent,
)
from django.core.files.uploadhandler import (
    SkipFile, StopFutureHandlers, StopUpload,
)
@@ -145,6 +147,13 @@ class MultiPartParser(object):
        old_field_name = None
        counters = [0] * len(handlers)

        # Number of bytes that have been read.
        num_bytes_read = 0
        # To count the number of keys in the request.
        num_post_keys = 0
        # To limit the amount of data read from the request.
        read_size = None

        try:
            for item_type, meta_data, field_stream in Parser(stream, self._boundary):
                if old_field_name:
@@ -166,15 +175,37 @@ class MultiPartParser(object):
                field_name = force_text(field_name, encoding, errors='replace')

                if item_type == FIELD:
                    # Avoid storing more than DATA_UPLOAD_MAX_NUMBER_FIELDS.
                    num_post_keys += 1
                    if (settings.DATA_UPLOAD_MAX_NUMBER_FIELDS is not None and
                            settings.DATA_UPLOAD_MAX_NUMBER_FIELDS < num_post_keys):
                        raise TooManyFieldsSent(
                            'The number of GET/POST parameters exceeded '
                            'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
                        )

                    # Avoid reading more than DATA_UPLOAD_MAX_MEMORY_SIZE.
                    if settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None:
                        read_size = settings.DATA_UPLOAD_MAX_MEMORY_SIZE - num_bytes_read

                    # This is a post field, we can just set it in the post
                    if transfer_encoding == 'base64':
                        raw_data = field_stream.read()
                        raw_data = field_stream.read(size=read_size)
                        num_bytes_read += len(raw_data)
                        try:
                            data = base64.b64decode(raw_data)
                        except _BASE64_DECODE_ERROR:
                            data = raw_data
                    else:
                        data = field_stream.read()
                        data = field_stream.read(size=read_size)
                        num_bytes_read += len(data)

                    # Add two here to make the check consistent with the
                    # x-www-form-urlencoded check that includes '&='.
                    num_bytes_read += len(field_name) + 2
                    if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None and
                            num_bytes_read > settings.DATA_UPLOAD_MAX_MEMORY_SIZE):
                        raise RequestDataTooBig('Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.')

                    self._post.appendlist(field_name,
                                          force_text(data, encoding, errors='replace'))
+19 −8
Original line number Diff line number Diff line
@@ -8,7 +8,9 @@ from itertools import chain

from django.conf import settings
from django.core import signing
from django.core.exceptions import DisallowedHost, ImproperlyConfigured
from django.core.exceptions import (
    DisallowedHost, ImproperlyConfigured, RequestDataTooBig,
)
from django.core.files import uploadhandler
from django.http.multipartparser import MultiPartParser, MultiPartParserError
from django.utils import six
@@ -16,9 +18,9 @@ from django.utils.datastructures import ImmutableList, MultiValueDict
from django.utils.encoding import (
    escape_uri_path, force_bytes, force_str, force_text, iri_to_uri,
)
from django.utils.http import is_same_domain
from django.utils.http import is_same_domain, limited_parse_qsl
from django.utils.six.moves.urllib.parse import (
    parse_qsl, quote, urlencode, urljoin, urlsplit,
    quote, urlencode, urljoin, urlsplit,
)

RAISE_ERROR = object()
@@ -259,6 +261,12 @@ class HttpRequest(object):
        if not hasattr(self, '_body'):
            if self._read_started:
                raise RawPostDataException("You cannot access body after reading from request's data stream")

            # Limit the maximum request data size that will be handled in-memory.
            if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None and
                    int(self.META.get('CONTENT_LENGTH', 0)) > settings.DATA_UPLOAD_MAX_MEMORY_SIZE):
                raise RequestDataTooBig('Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.')

            try:
                self._body = self.read()
            except IOError as e:
@@ -368,6 +376,12 @@ class QueryDict(MultiValueDict):
        if not encoding:
            encoding = settings.DEFAULT_CHARSET
        self.encoding = encoding
        query_string = query_string or ''
        parse_qsl_kwargs = {
            'keep_blank_values': True,
            'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
            'encoding': encoding,
        }
        if six.PY3:
            if isinstance(query_string, bytes):
                # query_string normally contains URL-encoded data, a subset of ASCII.
@@ -376,13 +390,10 @@ class QueryDict(MultiValueDict):
                except UnicodeDecodeError:
                    # ... but some user agents are misbehaving :-(
                    query_string = query_string.decode('iso-8859-1')
            for key, value in parse_qsl(query_string or '',
                                        keep_blank_values=True,
                                        encoding=encoding):
            for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs):
                self.appendlist(key, value)
        else:
            for key, value in parse_qsl(query_string or '',
                                        keep_blank_values=True):
            for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs):
                try:
                    value = value.decode(encoding)
                except UnicodeDecodeError:
+60 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ import unicodedata
from binascii import Error as BinasciiError
from email.utils import formatdate

from django.core.exceptions import TooManyFieldsSent
from django.utils import six
from django.utils.datastructures import MultiValueDict
from django.utils.encoding import force_bytes, force_str, force_text
@@ -34,6 +35,8 @@ ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
RFC3986_GENDELIMS = str(":/?#[]@")
RFC3986_SUBDELIMS = str("!$&'()*+,;=")

FIELDS_MATCH = re.compile('[&;]')


@keep_lazy_text
def urlquote(url, safe='/'):
@@ -314,3 +317,60 @@ def _is_safe_url(url, host):
        return False
    return ((not url_info.netloc or url_info.netloc == host) and
            (not url_info.scheme or url_info.scheme in ['http', 'https']))


def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
                      errors='replace', fields_limit=None):
    """
    Return a list of key/value tuples parsed from query string.

    Copied from urlparse with an additional "fields_limit" argument.
    Copyright (C) 2013 Python Software Foundation (see LICENSE.python).

    Arguments:

    qs: percent-encoded query string to be parsed

    keep_blank_values: flag indicating whether blank values in
        percent-encoded queries should be treated as blank strings. A
        true value indicates that blanks should be retained as blank
        strings. The default false value indicates that blank values
        are to be ignored and treated as if they were  not included.

    encoding and errors: specify how to decode percent-encoded sequences
        into Unicode characters, as accepted by the bytes.decode() method.

    fields_limit: maximum number of fields parsed or an exception
        is raised. None means no limit and is the default.
    """
    if fields_limit:
        pairs = FIELDS_MATCH.split(qs, fields_limit)
        if len(pairs) > fields_limit:
            raise TooManyFieldsSent(
                'The number of GET/POST parameters exceeded '
                'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
            )
    else:
        pairs = FIELDS_MATCH.split(qs)
    r = []
    for name_value in pairs:
        if not name_value:
            continue
        nv = name_value.split(str('='), 1)
        if len(nv) != 2:
            # Handle case of a control-name with no equal sign
            if keep_blank_values:
                nv.append('')
            else:
                continue
        if len(nv[1]) or keep_blank_values:
            if six.PY3:
                name = nv[0].replace('+', ' ')
                name = unquote(name, encoding=encoding, errors=errors)
                value = nv[1].replace('+', ' ')
                value = unquote(value, encoding=encoding, errors=errors)
            else:
                name = unquote(nv[0].replace(b'+', b' '))
                value = unquote(nv[1].replace(b'+', b' '))
            r.append((name, value))
    return r
Loading