Fixed #20869 -- made CSRF tokens change every request by salt-encrypting them (5112e65e) · Commits · Dom Sekotill / django

django/middleware/csrf.py

+84 −23

Original line number	Diff line number	Diff line
		@@ -8,6 +8,7 @@ from __future__ import unicode_literals

		import logging
		import re
		import string

		from django.conf import settings
		from django.urls import get_callable
		@@ -16,8 +17,10 @@ from django.utils.crypto import constant_time_compare, get_random_string
		from django.utils.deprecation import MiddlewareMixin
		from django.utils.encoding import force_text
		from django.utils.http import is_same_domain
		from django.utils.six.moves import zip
		from django.utils.six.moves.urllib.parse import urlparse


		logger = logging.getLogger('django.request')

		REASON_NO_REFERER = "Referer checking failed - no Referer."
		@@ -27,7 +30,9 @@ REASON_BAD_TOKEN = "CSRF token missing or incorrect."
		REASON_MALFORMED_REFERER = "Referer checking failed - Referer is malformed."
		REASON_INSECURE_REFERER = "Referer checking failed - Referer is insecure while host is secure."

		CSRF_KEY_LENGTH = 32
		CSRF_SECRET_LENGTH = 32
		CSRF_TOKEN_LENGTH = 2 * CSRF_SECRET_LENGTH
		CSRF_ALLOWED_CHARS = string.ascii_letters + string.digits


		def _get_failure_view():
		@@ -37,8 +42,38 @@ def _get_failure_view():
		return get_callable(settings.CSRF_FAILURE_VIEW)


		def _get_new_csrf_key():
		return get_random_string(CSRF_KEY_LENGTH)
		def _get_new_csrf_string():
		return get_random_string(CSRF_SECRET_LENGTH, allowed_chars=CSRF_ALLOWED_CHARS)


		def _salt_cipher_secret(secret):
		"""
		Given a secret (assumed to be a string of CSRF_ALLOWED_CHARS), generate a
		token by adding a salt and using it to encrypt the secret.
		"""
		salt = _get_new_csrf_string()
		chars = CSRF_ALLOWED_CHARS
		pairs = zip((chars.index(x) for x in secret), (chars.index(x) for x in salt))
		cipher = ''.join(chars[(x + y) % len(chars)] for x, y in pairs)
		return salt + cipher


		def _unsalt_cipher_token(token):
		"""
		Given a token (assumed to be a string of CSRF_ALLOWED_CHARS, of length
		CSRF_TOKEN_LENGTH, and that its first half is a salt), use it to decrypt
		the second half to produce the original secret.
		"""
		salt = token[:CSRF_SECRET_LENGTH]
		token = token[CSRF_SECRET_LENGTH:]
		chars = CSRF_ALLOWED_CHARS
		pairs = zip((chars.index(x) for x in token), (chars.index(x) for x in salt))
		secret = ''.join(chars[x - y] for x, y in pairs) # Note negative values are ok
		return secret


		def _get_new_csrf_token():
		return _salt_cipher_secret(_get_new_csrf_string())


		def get_token(request):
		@@ -52,9 +87,12 @@ def get_token(request):
		function lazily, as is done by the csrf context processor.
		"""
		if "CSRF_COOKIE" not in request.META:
		request.META["CSRF_COOKIE"] = _get_new_csrf_key()
		csrf_secret = _get_new_csrf_string()
		request.META["CSRF_COOKIE"] = _salt_cipher_secret(csrf_secret)
		else:
		csrf_secret = _unsalt_cipher_token(request.META["CSRF_COOKIE"])
		request.META["CSRF_COOKIE_USED"] = True
		return request.META["CSRF_COOKIE"]
		return _salt_cipher_secret(csrf_secret)


		def rotate_token(request):
		@@ -64,19 +102,35 @@ def rotate_token(request):
		"""
		request.META.update({
		"CSRF_COOKIE_USED": True,
		"CSRF_COOKIE": _get_new_csrf_key(),
		"CSRF_COOKIE": _get_new_csrf_token(),
		})
		request.csrf_cookie_needs_reset = True


		def _sanitize_token(token):
		# Allow only alphanum
		if len(token) > CSRF_KEY_LENGTH:
		return _get_new_csrf_key()
		token = re.sub('[^a-zA-Z0-9]+', '', force_text(token))
		if token == "":
		# In case the cookie has been truncated to nothing at some point.
		return _get_new_csrf_key()
		# Allow only ASCII alphanumerics
		if re.search('[^a-zA-Z0-9]', force_text(token)):
		return _get_new_csrf_token()
		elif len(token) == CSRF_TOKEN_LENGTH:
		return token
		elif len(token) == CSRF_SECRET_LENGTH:
		# Older Django versions set cookies to values of CSRF_SECRET_LENGTH
		# alphanumeric characters. For backwards compatibility, accept
		# such values as unsalted secrets.
		# It's easier to salt here and be consistent later, rather than add
		# different code paths in the checks, although that might be a tad more
		# efficient.
		return _salt_cipher_secret(token)
		return _get_new_csrf_token()


		def _compare_salted_tokens(request_csrf_token, csrf_token):
		# Assume both arguments are sanitized -- that is, strings of
		# length CSRF_TOKEN_LENGTH, all CSRF_ALLOWED_CHARS.
		return constant_time_compare(
		_unsalt_cipher_token(request_csrf_token),
		_unsalt_cipher_token(csrf_token),
		)


		class CsrfViewMiddleware(MiddlewareMixin):
		@@ -112,12 +166,17 @@ class CsrfViewMiddleware(MiddlewareMixin):
		return None

		try:
		csrf_token = _sanitize_token(
		request.COOKIES[settings.CSRF_COOKIE_NAME])
		# Use same token next time
		request.META['CSRF_COOKIE'] = csrf_token
		cookie_token = request.COOKIES[settings.CSRF_COOKIE_NAME]
		except KeyError:
		csrf_token = None
		else:
		csrf_token = _sanitize_token(cookie_token)
		if csrf_token != cookie_token:
		# Cookie token needed to be replaced;
		# the cookie needs to be reset.
		request.csrf_cookie_needs_reset = True
		# Use same token next time.
		request.META['CSRF_COOKIE'] = csrf_token

		# Wait until request.META["CSRF_COOKIE"] has been manipulated before
		# bailing out, so that get_token still works
		@@ -142,7 +201,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
		#
		# The attacker will need to provide a CSRF cookie and token, but
		# that's no problem for a MITM and the session-independent
		# nonce we're using. So the MITM can circumvent the CSRF
		# secret we're using. So the MITM can circumvent the CSRF
		# protection. This is true for any HTTP connection, but anyone
		# using HTTPS expects better! For this reason, for
		# https://example.com/ we need additional protection that treats
		@@ -213,13 +272,15 @@ class CsrfViewMiddleware(MiddlewareMixin):
		# and possible for PUT/DELETE.
		request_csrf_token = request.META.get(settings.CSRF_HEADER_NAME, '')

		if not constant_time_compare(request_csrf_token, csrf_token):
		request_csrf_token = _sanitize_token(request_csrf_token)
		if not _compare_salted_tokens(request_csrf_token, csrf_token):
		return self._reject(request, REASON_BAD_TOKEN)

		return self._accept(request)

		def process_response(self, request, response):
		if getattr(response, 'csrf_processing_done', False):
		if not getattr(request, 'csrf_cookie_needs_reset', False):
		if getattr(response, 'csrf_cookie_set', False):
		return response

		if not request.META.get("CSRF_COOKIE_USED", False):
		@@ -237,5 +298,5 @@ class CsrfViewMiddleware(MiddlewareMixin):
		)
		# Content varies with the CSRF cookie, so set the Vary header.
		patch_vary_headers(response, ('Cookie',))
		response.csrf_processing_done = True
		response.csrf_cookie_set = True
		return response

docs/ref/csrf.txt

+27 −19

Original line number	Diff line number	Diff line
		@@ -218,20 +218,25 @@ How it works

		The CSRF protection is based on the following things:

		1. A CSRF cookie that is set to a random value (a session independent nonce, as
		it is called), which other sites will not have access to.
		1. A CSRF cookie that is based on a random secret value, which other sites
		will not have access to.

		This cookie is set by ``CsrfViewMiddleware``. It is meant to be permanent,
		but since there is no way to set a cookie that never expires, it is sent with
		every response that has called ``django.middleware.csrf.get_token()``
		(the function used internally to retrieve the CSRF token).
		This cookie is set by ``CsrfViewMiddleware``. It is sent with every
		response that has called ``django.middleware.csrf.get_token()`` (the
		function used internally to retrieve the CSRF token), if it wasn't already
		set on the request.

		For security reasons, the value of the CSRF cookie is changed each time a
		In order to protect against `BREACH`_ attacks, the token is not simply the
		secret; a random salt is prepended to the secret and used to scramble it.

		For security reasons, the value of the secret is changed each time a
		user logs in.

		2. A hidden form field with the name 'csrfmiddlewaretoken' present in all
		outgoing POST forms. The value of this field is the value of the CSRF
		cookie.
		outgoing POST forms. The value of this field is, again, the value of the
		secret, with a salt which is both added to it and used to scramble it. The
		salt is regenerated on every call to ``get_token()`` so that the form field
		value is changed in every such response.

		This part is done by the template tag.

		@@ -239,6 +244,11 @@ The CSRF protection is based on the following things:
		TRACE, a CSRF cookie must be present, and the 'csrfmiddlewaretoken' field
		must be present and correct. If it isn't, the user will get a 403 error.

		When validating the 'csrfmiddlewaretoken' field value, only the secret,
		not the full token, is compared with the secret in the cookie value.
		This allows the use of ever-changing tokens. While each request may use its
		own token, the secret remains common to all.

		This check is done by ``CsrfViewMiddleware``.

		4. In addition, for HTTPS requests, strict referer checking is done by
		@@ -247,7 +257,7 @@ The CSRF protection is based on the following things:
		application since that request won't come from your own exact domain.

		This also addresses a man-in-the-middle attack that's possible under HTTPS
		when using a session independent nonce, due to the fact that HTTP
		when using a session independent secret, due to the fact that HTTP
		``Set-Cookie`` headers are (unfortunately) accepted by clients even when
		they are talking to a site under HTTPS. (Referer checking is not done for
		HTTP requests because the presence of the ``Referer`` header isn't reliable
		@@ -283,6 +293,13 @@ vulnerability allows and much worse).

		Checking against the :setting:`CSRF_COOKIE_DOMAIN` setting was added.

		.. versionchanged:: 1.10

		Added salting to the token and started changing it with each request
		to protect against `BREACH`_ attacks.

		.. _BREACH: http://breachattack.com/

		Caching
		=======

		@@ -499,15 +516,6 @@ No, this is by design. Not linking CSRF protection to a session allows using
		the protection on sites such as a `pastebin` that allow submissions from
		anonymous users which don't have a session.

		Why not use a new token for each request?
		-----------------------------------------

		Generating a new token for each request is problematic from a UI perspective
		because it invalidates all previous forms. Most users would be very unhappy to
		find that opening a new tab on your site has invalidated the form they had
		just spent time filling out in another tab or that a form they accessed via
		the back button could not be filled out.

		Why might a user encounter a CSRF validation failure after logging in?
		----------------------------------------------------------------------

docs/ref/middleware.txt

+12 −7

Original line number	Diff line number	Diff line
		@@ -118,13 +118,12 @@ GZip middleware
		.. warning::

		Security researchers recently revealed that when compression techniques
		(including ``GZipMiddleware``) are used on a website, the site becomes
		exposed to a number of possible attacks. These approaches can be used to
		compromise, among other things, Django's CSRF protection. Before using
		``GZipMiddleware`` on your site, you should consider very carefully whether
		you are subject to these attacks. If you're in any doubt about whether
		you're affected, you should avoid using ``GZipMiddleware``. For more
		details, see the `the BREACH paper (PDF)`_ and `breachattack.com`_.
		(including ``GZipMiddleware``) are used on a website, the site may become
		exposed to a number of possible attacks. Before using ``GZipMiddleware`` on
		your site, you should consider very carefully whether you are subject to
		these attacks. If you're in any doubt about whether you're affected, you
		should avoid using ``GZipMiddleware``. For more details, see the `the BREACH
		paper (PDF)`_ and `breachattack.com`_.

		.. _the BREACH paper (PDF): http://breachattack.com/resources/BREACH%20-%20SSL,%20gone%20in%2030%20seconds.pdf
		.. _breachattack.com: http://breachattack.com
		@@ -147,6 +146,12 @@ It will NOT compress content if any of the following are true:
		You can apply GZip compression to individual views using the
		:func:`~django.views.decorators.gzip.gzip_page()` decorator.

		.. versionchanged:: 1.10

		In older versions, Django's CSRF protection mechanism was vulnerable to
		BREACH attacks when compression was used. This is no longer the case, but
		you should still take care not to compromise your own secrets this way.

		Conditional GET middleware
		--------------------------

docs/releases/1.10.txt

+12 −0

Original line number	Diff line number	Diff line
		@@ -256,6 +256,12 @@ CSRF
		accepts an optional ``template_name`` parameter, defaulting to
		``'403_csrf.html'``, to control the template used to render the page.

		* To protect against `BREACH`_ attacks, the CSRF protection mechanism now
		changes the form token value on every request (while keeping an invariant
		secret which can be used to validate the different tokens).

		.. _BREACH: http://breachattack.com/

		Database backends
		~~~~~~~~~~~~~~~~~

		@@ -795,6 +801,12 @@ Miscellaneous
		* ``utils.version.get_version()`` returns :pep:`440` compliant release
		candidate versions (e.g. '1.10rc1' instead of '1.10c1').

		* CSRF token values are now required to be strings of 64 alphanumerics; values
		of 32 alphanumerics, as set by older versions of Django by default, are
		automatically replaced by strings of 64 characters. Other values are
		considered invalid. This should only affect developers or users who replace
		these tokens.

		* The ``LOGOUT_URL`` setting is removed as Django hasn't made use of it
		since pre-1.0. If you use it in your project, you can add it to your
		project's settings. The default value was ``'/accounts/logout/'``.

docs/topics/security.txt

+2 −2

Original line number	Diff line number	Diff line
		@@ -65,10 +65,10 @@ this if you know what you are doing. There are other :ref:`limitations
		<csrf-limitations>` if your site has subdomains that are outside of your
		control.

		:ref:`CSRF protection works <how-csrf-works>` by checking for a nonce in each
		:ref:`CSRF protection works <how-csrf-works>` by checking for a secret in each
		POST request. This ensures that a malicious user cannot simply "replay" a form
		POST to your website and have another logged in user unwittingly submit that
		form. The malicious user would have to know the nonce, which is user specific
		form. The malicious user would have to know the secret, which is user specific
		(using a cookie).

		When deployed with :ref:`HTTPS <security-recommendation-ssl>`,