Commit 58bd2492 authored by Luke Plant's avatar Luke Plant
Browse files

[1.2.X] Fixed #717 - If-Modified-Since handling should compare dates according to RFC 2616

Thanks to Maniac for the report, julienb for the initial patch, and
especially to aaugustin for the final patch and tests.

Backport of [15696] from trunk.

git-svn-id: http://code.djangoproject.com/svn/django/branches/releases/1.2.X@15697 bcc190cf-cafb-0310-a4f2-bffc1f526a37
parent 8d3ee298
Loading
Loading
Loading
Loading
+11 −7
Original line number Diff line number Diff line
from django.core.exceptions import MiddlewareNotUsed
from django.utils.http import http_date
from django.utils.http import http_date, parse_http_date_safe

class ConditionalGetMiddleware(object):
    """
@@ -15,7 +15,7 @@ class ConditionalGetMiddleware(object):
            response['Content-Length'] = str(len(response.content))

        if response.has_header('ETag'):
            if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None)
            if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
            if if_none_match == response['ETag']:
                # Setting the status is enough here. The response handling path
                # automatically removes content for this status code (in
@@ -23,8 +23,12 @@ class ConditionalGetMiddleware(object):
                response.status_code = 304

        if response.has_header('Last-Modified'):
            if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None)
            if if_modified_since == response['Last-Modified']:
            if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
            if if_modified_since is not None:
                if_modified_since = parse_http_date_safe(if_modified_since)
            if if_modified_since is not None:
                last_modified = parse_http_date_safe(response['Last-Modified'])
                if last_modified is not None and last_modified <= if_modified_since:
                    # Setting the status code is enough here (same reasons as
                    # above).
                    response.status_code = 304
+55 −0
Original line number Diff line number Diff line
import calendar
import datetime
import re
import sys
import urllib
@@ -8,6 +10,17 @@ from django.utils.functional import allow_lazy

ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')

MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
__D = r'(?P<day>\d{2})'
__D2 = r'(?P<day>[ \d]\d)'
__M = r'(?P<mon>\w{3})'
__Y = r'(?P<year>\d{4})'
__Y2 = r'(?P<year>\d{2})'
__T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})'
RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T))
RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T))
ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))

def urlquote(url, safe='/'):
    """
    A version of Python's urllib.quote() function that can operate on unicode
@@ -70,6 +83,48 @@ def http_date(epoch_seconds=None):
    rfcdate = formatdate(epoch_seconds)
    return '%s GMT' % rfcdate[:25]

def parse_http_date(date):
    """
    Parses a date format as specified by HTTP RFC2616 section 3.3.1.

    The three formats allowed by the RFC are accepted, even if only the first
    one is still in widespread use.

    Returns an floating point number expressed in seconds since the epoch, in
    UTC.
    """
    # emails.Util.parsedate does the job for RFC1123 dates; unfortunately
    # RFC2616 makes it mandatory to support RFC850 dates too. So we roll
    # our own RFC-compliant parsing.
    for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:
        m = regex.match(date)
        if m is not None:
            break
    else:
        raise ValueError("%r is not in a valid HTTP date format" % date)
    try:
        year = int(m.group('year'))
        if year < 100:
            year += 2000 if year < 70 else 1900
        month = MONTHS.index(m.group('mon').lower()) + 1
        day = int(m.group('day'))
        hour = int(m.group('hour'))
        min = int(m.group('min'))
        sec = int(m.group('sec'))
        result = datetime.datetime(year, month, day, hour, min, sec)
        return calendar.timegm(result.utctimetuple())
    except Exception:
        raise ValueError("%r is not a valid date" % date)

def parse_http_date_safe(date):
    """
    Same as parse_http_date, but returns None if the input is invalid.
    """
    try:
        return parse_http_date(date)
    except Exception:
        pass

# Base 36 functions: useful for generating compact URLs

def base36_to_int(s):
+10 −8
Original line number Diff line number Diff line
@@ -9,10 +9,9 @@ except ImportError:

from calendar import timegm
from datetime import timedelta
from email.Utils import formatdate

from django.utils.decorators import decorator_from_middleware, available_attrs
from django.utils.http import parse_etags, quote_etag
from django.utils.http import http_date, parse_http_date_safe, parse_etags, quote_etag
from django.middleware.http import ConditionalGetMiddleware
from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse

@@ -70,6 +69,8 @@ def condition(etag_func=None, last_modified_func=None):
        def inner(request, *args, **kwargs):
            # Get HTTP request headers
            if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE")
            if if_modified_since:
                if_modified_since = parse_http_date_safe(if_modified_since)
            if_none_match = request.META.get("HTTP_IF_NONE_MATCH")
            if_match = request.META.get("HTTP_IF_MATCH")
            if if_none_match or if_match:
@@ -93,7 +94,7 @@ def condition(etag_func=None, last_modified_func=None):
            if last_modified_func:
                dt = last_modified_func(request, *args, **kwargs)
                if dt:
                    res_last_modified = formatdate(timegm(dt.utctimetuple()))[:26] + 'GMT'
                    res_last_modified = timegm(dt.utctimetuple())
                else:
                    res_last_modified = None
            else:
@@ -107,7 +108,8 @@ def condition(etag_func=None, last_modified_func=None):
                if ((if_none_match and (res_etag in etags or
                        "*" in etags and res_etag)) and
                        (not if_modified_since or
                            res_last_modified == if_modified_since)):
                            (res_last_modified and if_modified_since and
                            res_last_modified <= if_modified_since))):
                    if request.method in ("GET", "HEAD"):
                        response = HttpResponseNotModified()
                    else:
@@ -115,9 +117,9 @@ def condition(etag_func=None, last_modified_func=None):
                elif if_match and ((not res_etag and "*" in etags) or
                        (res_etag and res_etag not in etags)):
                    response = HttpResponse(status=412)
                elif (not if_none_match and if_modified_since and
                        request.method == "GET" and
                        res_last_modified == if_modified_since):
                elif (not if_none_match and request.method == "GET" and
                        res_last_modified and if_modified_since and
                        res_last_modified <= if_modified_since):
                    response = HttpResponseNotModified()

            if response is None:
@@ -125,7 +127,7 @@ def condition(etag_func=None, last_modified_func=None):

            # Set relevant headers on the response if they don't already exist.
            if res_last_modified and not response.has_header('Last-Modified'):
                response['Last-Modified'] = res_last_modified
                response['Last-Modified'] = http_date(res_last_modified)
            if res_etag and not response.has_header('ETag'):
                response['ETag'] = quote_etag(res_etag)

+2 −6
Original line number Diff line number Diff line
@@ -9,12 +9,11 @@ import posixpath
import re
import stat
import urllib
from email.Utils import parsedate_tz, mktime_tz

from django.template import loader
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseNotModified
from django.template import Template, Context, TemplateDoesNotExist
from django.utils.http import http_date
from django.utils.http import http_date, parse_http_date

def serve(request, path, document_root=None, show_indexes=False):
    """
@@ -129,10 +128,7 @@ def was_modified_since(header=None, mtime=0, size=0):
            raise ValueError
        matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header,
                           re.IGNORECASE)
        header_date = parsedate_tz(matches.group(1))
        if header_date is None:
            raise ValueError
        header_mtime = mktime_tz(header_date)
        header_mtime = parse_http_date(matches.group(1))
        header_len = matches.group(3)
        if header_len and int(header_len) != size:
            raise ValueError
+30 −4
Original line number Diff line number Diff line
# -*- coding:utf-8 -*-
from datetime import datetime, timedelta
from calendar import timegm
from datetime import datetime

from django.test import TestCase
from django.utils.http import parse_etags, quote_etag
from django.utils import unittest
from django.utils.http import parse_etags, quote_etag, parse_http_date

FULL_RESPONSE = 'Test conditional get response'
LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47)
LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT'
LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT'
LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT'
EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT'
ETAG = 'b4246ffc4f62314ca13147c9d4f76974'
EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6'


class ConditionalGet(TestCase):
    def assertFullResponse(self, response, check_last_modified=True, check_etag=True):
        self.assertEquals(response.status_code, 200)
@@ -33,6 +36,12 @@ class ConditionalGet(TestCase):
        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
        response = self.client.get('/condition/')
        self.assertNotModified(response)
        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_NEWER_STR
        response = self.client.get('/condition/')
        self.assertNotModified(response)
        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_INVALID_STR
        response = self.client.get('/condition/')
        self.assertFullResponse(response)
        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
        response = self.client.get('/condition/')
        self.assertFullResponse(response)
@@ -118,7 +127,7 @@ class ConditionalGet(TestCase):
        self.assertFullResponse(response, check_last_modified=False)


class ETagProcesing(TestCase):
class ETagProcessing(unittest.TestCase):
    def testParsing(self):
        etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"')
        self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak'])
@@ -126,3 +135,20 @@ class ETagProcesing(TestCase):
    def testQuoting(self):
        quoted_etag = quote_etag(r'e\t"ag')
        self.assertEquals(quoted_etag, r'"e\\t\"ag"')


class HttpDateProcessing(unittest.TestCase):
    def testParsingRfc1123(self):
        parsed = parse_http_date('Sun, 06 Nov 1994 08:49:37 GMT')
        self.assertEqual(datetime.utcfromtimestamp(parsed),
                         datetime(1994, 11, 06, 8, 49, 37))

    def testParsingRfc850(self):
        parsed = parse_http_date('Sunday, 06-Nov-94 08:49:37 GMT')
        self.assertEqual(datetime.utcfromtimestamp(parsed),
                         datetime(1994, 11, 06, 8, 49, 37))

    def testParsingAsctime(self):
        parsed = parse_http_date('Sun Nov  6 08:49:37 1994')
        self.assertEqual(datetime.utcfromtimestamp(parsed),
                         datetime(1994, 11, 06, 8, 49, 37))
Loading