Commit 5112e65e authored by Shai Berger's avatar Shai Berger
Browse files

Fixed #20869 -- made CSRF tokens change every request by salt-encrypting them

Note that the cookie is not changed every request, just the token retrieved
by the `get_token()` method (used also by the `{% csrf_token %}` tag).

While at it, made token validation strict: Where, before, any length was
accepted and non-ASCII chars were ignored, we now treat anything other than
`[A-Za-z0-9]{64}` as invalid (except for 32-char tokens, which, for
backwards-compatibility, are accepted and replaced by 64-char ones).

Thanks Trac user patrys for reporting, github user adambrenecki
for initial patch, Tim Graham for help, and Curtis Maloney,
Collin Anderson, Florian Apolloner, Markus Holtermann & Jon Dufresne
for reviews.
parent 6d9c5d46
Loading
Loading
Loading
Loading
+84 −23
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ from __future__ import unicode_literals

import logging
import re
import string

from django.conf import settings
from django.urls import get_callable
@@ -16,8 +17,10 @@ from django.utils.crypto import constant_time_compare, get_random_string
from django.utils.deprecation import MiddlewareMixin
from django.utils.encoding import force_text
from django.utils.http import is_same_domain
from django.utils.six.moves import zip
from django.utils.six.moves.urllib.parse import urlparse


logger = logging.getLogger('django.request')

REASON_NO_REFERER = "Referer checking failed - no Referer."
@@ -27,7 +30,9 @@ REASON_BAD_TOKEN = "CSRF token missing or incorrect."
REASON_MALFORMED_REFERER = "Referer checking failed - Referer is malformed."
REASON_INSECURE_REFERER = "Referer checking failed - Referer is insecure while host is secure."

CSRF_KEY_LENGTH = 32
CSRF_SECRET_LENGTH = 32
CSRF_TOKEN_LENGTH = 2 * CSRF_SECRET_LENGTH
CSRF_ALLOWED_CHARS = string.ascii_letters + string.digits


def _get_failure_view():
@@ -37,8 +42,38 @@ def _get_failure_view():
    return get_callable(settings.CSRF_FAILURE_VIEW)


def _get_new_csrf_key():
    return get_random_string(CSRF_KEY_LENGTH)
def _get_new_csrf_string():
    return get_random_string(CSRF_SECRET_LENGTH, allowed_chars=CSRF_ALLOWED_CHARS)


def _salt_cipher_secret(secret):
    """
    Given a secret (assumed to be a string of CSRF_ALLOWED_CHARS), generate a
    token by adding a salt and using it to encrypt the secret.
    """
    salt = _get_new_csrf_string()
    chars = CSRF_ALLOWED_CHARS
    pairs = zip((chars.index(x) for x in secret), (chars.index(x) for x in salt))
    cipher = ''.join(chars[(x + y) % len(chars)] for x, y in pairs)
    return salt + cipher


def _unsalt_cipher_token(token):
    """
    Given a token (assumed to be a string of CSRF_ALLOWED_CHARS, of length
    CSRF_TOKEN_LENGTH, and that its first half is a salt), use it to decrypt
    the second half to produce the original secret.
    """
    salt = token[:CSRF_SECRET_LENGTH]
    token = token[CSRF_SECRET_LENGTH:]
    chars = CSRF_ALLOWED_CHARS
    pairs = zip((chars.index(x) for x in token), (chars.index(x) for x in salt))
    secret = ''.join(chars[x - y] for x, y in pairs)  # Note negative values are ok
    return secret


def _get_new_csrf_token():
    return _salt_cipher_secret(_get_new_csrf_string())


def get_token(request):
@@ -52,9 +87,12 @@ def get_token(request):
    function lazily, as is done by the csrf context processor.
    """
    if "CSRF_COOKIE" not in request.META:
        request.META["CSRF_COOKIE"] = _get_new_csrf_key()
        csrf_secret = _get_new_csrf_string()
        request.META["CSRF_COOKIE"] = _salt_cipher_secret(csrf_secret)
    else:
        csrf_secret = _unsalt_cipher_token(request.META["CSRF_COOKIE"])
    request.META["CSRF_COOKIE_USED"] = True
    return request.META["CSRF_COOKIE"]
    return _salt_cipher_secret(csrf_secret)


def rotate_token(request):
@@ -64,19 +102,35 @@ def rotate_token(request):
    """
    request.META.update({
        "CSRF_COOKIE_USED": True,
        "CSRF_COOKIE": _get_new_csrf_key(),
        "CSRF_COOKIE": _get_new_csrf_token(),
    })
    request.csrf_cookie_needs_reset = True


def _sanitize_token(token):
    # Allow only alphanum
    if len(token) > CSRF_KEY_LENGTH:
        return _get_new_csrf_key()
    token = re.sub('[^a-zA-Z0-9]+', '', force_text(token))
    if token == "":
        # In case the cookie has been truncated to nothing at some point.
        return _get_new_csrf_key()
    # Allow only ASCII alphanumerics
    if re.search('[^a-zA-Z0-9]', force_text(token)):
        return _get_new_csrf_token()
    elif len(token) == CSRF_TOKEN_LENGTH:
        return token
    elif len(token) == CSRF_SECRET_LENGTH:
        # Older Django versions set cookies to values of CSRF_SECRET_LENGTH
        # alphanumeric characters. For backwards compatibility, accept
        # such values as unsalted secrets.
        # It's easier to salt here and be consistent later, rather than add
        # different code paths in the checks, although that might be a tad more
        # efficient.
        return _salt_cipher_secret(token)
    return _get_new_csrf_token()


def _compare_salted_tokens(request_csrf_token, csrf_token):
    # Assume both arguments are sanitized -- that is, strings of
    # length CSRF_TOKEN_LENGTH, all CSRF_ALLOWED_CHARS.
    return constant_time_compare(
        _unsalt_cipher_token(request_csrf_token),
        _unsalt_cipher_token(csrf_token),
    )


class CsrfViewMiddleware(MiddlewareMixin):
@@ -112,12 +166,17 @@ class CsrfViewMiddleware(MiddlewareMixin):
            return None

        try:
            csrf_token = _sanitize_token(
                request.COOKIES[settings.CSRF_COOKIE_NAME])
            # Use same token next time
            request.META['CSRF_COOKIE'] = csrf_token
            cookie_token = request.COOKIES[settings.CSRF_COOKIE_NAME]
        except KeyError:
            csrf_token = None
        else:
            csrf_token = _sanitize_token(cookie_token)
            if csrf_token != cookie_token:
                # Cookie token needed to be replaced;
                # the cookie needs to be reset.
                request.csrf_cookie_needs_reset = True
            # Use same token next time.
            request.META['CSRF_COOKIE'] = csrf_token

        # Wait until request.META["CSRF_COOKIE"] has been manipulated before
        # bailing out, so that get_token still works
@@ -142,7 +201,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
                #
                # The attacker will need to provide a CSRF cookie and token, but
                # that's no problem for a MITM and the session-independent
                # nonce we're using. So the MITM can circumvent the CSRF
                # secret we're using. So the MITM can circumvent the CSRF
                # protection. This is true for any HTTP connection, but anyone
                # using HTTPS expects better! For this reason, for
                # https://example.com/ we need additional protection that treats
@@ -213,13 +272,15 @@ class CsrfViewMiddleware(MiddlewareMixin):
                # and possible for PUT/DELETE.
                request_csrf_token = request.META.get(settings.CSRF_HEADER_NAME, '')

            if not constant_time_compare(request_csrf_token, csrf_token):
            request_csrf_token = _sanitize_token(request_csrf_token)
            if not _compare_salted_tokens(request_csrf_token, csrf_token):
                return self._reject(request, REASON_BAD_TOKEN)

        return self._accept(request)

    def process_response(self, request, response):
        if getattr(response, 'csrf_processing_done', False):
        if not getattr(request, 'csrf_cookie_needs_reset', False):
            if getattr(response, 'csrf_cookie_set', False):
                return response

        if not request.META.get("CSRF_COOKIE_USED", False):
@@ -237,5 +298,5 @@ class CsrfViewMiddleware(MiddlewareMixin):
                            )
        # Content varies with the CSRF cookie, so set the Vary header.
        patch_vary_headers(response, ('Cookie',))
        response.csrf_processing_done = True
        response.csrf_cookie_set = True
        return response
+27 −19
Original line number Diff line number Diff line
@@ -218,20 +218,25 @@ How it works

The CSRF protection is based on the following things:

1. A CSRF cookie that is set to a random value (a session independent nonce, as
   it is called), which other sites will not have access to.
1. A CSRF cookie that is based on a random secret value, which other sites
   will not have access to.

   This cookie is set by ``CsrfViewMiddleware``.  It is meant to be permanent,
   but since there is no way to set a cookie that never expires, it is sent with
   every response that has called ``django.middleware.csrf.get_token()``
   (the function used internally to retrieve the CSRF token).
   This cookie is set by ``CsrfViewMiddleware``. It is sent with every
   response that has called ``django.middleware.csrf.get_token()`` (the
   function used internally to retrieve the CSRF token), if it wasn't already
   set on the request.

   For security reasons, the value of the CSRF cookie is changed each time a
   In order to protect against `BREACH`_ attacks, the token is not simply the
   secret; a random salt is prepended to the secret and used to scramble it.

   For security reasons, the value of the secret is changed each time a
   user logs in.

2. A hidden form field with the name 'csrfmiddlewaretoken' present in all
   outgoing POST forms.  The value of this field is the value of the CSRF
   cookie.
   outgoing POST forms. The value of this field is, again, the value of the
   secret, with a salt which is both added to it and used to scramble it. The
   salt is regenerated on every call to ``get_token()`` so that the form field
   value is changed in every such response.

   This part is done by the template tag.

@@ -239,6 +244,11 @@ The CSRF protection is based on the following things:
   TRACE, a CSRF cookie must be present, and the 'csrfmiddlewaretoken' field
   must be present and correct. If it isn't, the user will get a 403 error.

   When validating the 'csrfmiddlewaretoken' field value, only the secret,
   not the full token, is compared with the secret in the cookie value.
   This allows the use of ever-changing tokens. While each request may use its
   own token, the secret remains common to all.

   This check is done by ``CsrfViewMiddleware``.

4. In addition, for HTTPS requests, strict referer checking is done by
@@ -247,7 +257,7 @@ The CSRF protection is based on the following things:
   application since that request won't come from your own exact domain.

   This also addresses a man-in-the-middle attack that's possible under HTTPS
   when using a session independent nonce, due to the fact that HTTP
   when using a session independent secret, due to the fact that HTTP
   ``Set-Cookie`` headers are (unfortunately) accepted by clients even when
   they are talking to a site under HTTPS. (Referer checking is not done for
   HTTP requests because the presence of the ``Referer`` header isn't reliable
@@ -283,6 +293,13 @@ vulnerability allows and much worse).

    Checking against the :setting:`CSRF_COOKIE_DOMAIN` setting was added.

.. versionchanged:: 1.10

   Added salting to the token and started changing it with each request
   to protect against `BREACH`_ attacks.

.. _BREACH: http://breachattack.com/

Caching
=======

@@ -499,15 +516,6 @@ No, this is by design. Not linking CSRF protection to a session allows using
the protection on sites such as a `pastebin` that allow submissions from
anonymous users which don't have a session.

Why not use a new token for each request?
-----------------------------------------

Generating a new token for each request is problematic from a UI perspective
because it invalidates all previous forms. Most users would be very unhappy to
find that opening a new tab on your site has invalidated the form they had
just spent time filling out in another tab or that a form they accessed via
the back button could not be filled out.

Why might a user encounter a CSRF validation failure after logging in?
----------------------------------------------------------------------

+12 −7
Original line number Diff line number Diff line
@@ -118,13 +118,12 @@ GZip middleware
.. warning::

    Security researchers recently revealed that when compression techniques
    (including ``GZipMiddleware``) are used on a website, the site becomes
    exposed to a number of possible attacks. These approaches can be used to
    compromise, among other things, Django's CSRF protection. Before using
    ``GZipMiddleware`` on your site, you should consider very carefully whether
    you are subject to these attacks. If you're in *any* doubt about whether
    you're affected, you should avoid using ``GZipMiddleware``. For more
    details, see the `the BREACH paper (PDF)`_ and `breachattack.com`_.
    (including ``GZipMiddleware``) are used on a website, the site may become
    exposed to a number of possible attacks. Before using ``GZipMiddleware`` on
    your site, you should consider very carefully whether you are subject to
    these attacks. If you're in *any* doubt about whether you're affected, you
    should avoid using ``GZipMiddleware``. For more details, see the `the BREACH
    paper (PDF)`_ and `breachattack.com`_.

    .. _the BREACH paper (PDF): http://breachattack.com/resources/BREACH%20-%20SSL,%20gone%20in%2030%20seconds.pdf
    .. _breachattack.com: http://breachattack.com
@@ -147,6 +146,12 @@ It will NOT compress content if any of the following are true:
You can apply GZip compression to individual views using the
:func:`~django.views.decorators.gzip.gzip_page()` decorator.

.. versionchanged:: 1.10

    In older versions, Django's CSRF protection mechanism was vulnerable to
    BREACH attacks when compression was used. This is no longer the case, but
    you should still take care not to compromise your own secrets this way.

Conditional GET middleware
--------------------------

+12 −0
Original line number Diff line number Diff line
@@ -256,6 +256,12 @@ CSRF
  accepts an optional ``template_name`` parameter, defaulting to
  ``'403_csrf.html'``, to control the template used to render the page.

* To protect against `BREACH`_ attacks, the CSRF protection mechanism now
  changes the form token value on every request (while keeping an invariant
  secret which can be used to validate the different tokens).

.. _BREACH: http://breachattack.com/

Database backends
~~~~~~~~~~~~~~~~~

@@ -795,6 +801,12 @@ Miscellaneous
* ``utils.version.get_version()`` returns :pep:`440` compliant release
  candidate versions (e.g. '1.10rc1' instead of '1.10c1').

* CSRF token values are now required to be strings of 64 alphanumerics; values
  of 32 alphanumerics, as set by older versions of Django by default, are
  automatically replaced by strings of 64 characters. Other values are
  considered invalid. This should only affect developers or users who replace
  these tokens.

* The ``LOGOUT_URL`` setting is removed as Django hasn't made use of it
  since pre-1.0. If you use it in your project, you can add it to your
  project's settings. The default value was ``'/accounts/logout/'``.
+2 −2
Original line number Diff line number Diff line
@@ -65,10 +65,10 @@ this if you know what you are doing. There are other :ref:`limitations
<csrf-limitations>` if your site has subdomains that are outside of your
control.

:ref:`CSRF protection works <how-csrf-works>` by checking for a nonce in each
:ref:`CSRF protection works <how-csrf-works>` by checking for a secret in each
POST request. This ensures that a malicious user cannot simply "replay" a form
POST to your website and have another logged in user unwittingly submit that
form. The malicious user would have to know the nonce, which is user specific
form. The malicious user would have to know the secret, which is user specific
(using a cookie).

When deployed with :ref:`HTTPS <security-recommendation-ssl>`,
Loading