Fixed #9886 -- Added a file-like interface to HttpRequest. Thanks to Ivan... (269e9217) · Commits · Dom Sekotill / django

django/core/handlers/modpython.py

+2 −28

Original line number	Diff line number	Diff line
		@@ -42,6 +42,8 @@ class ModPythonRequest(http.HttpRequest):
		# naughty, but also pretty harmless.
		self.path_info = u'/'
		self._post_parse_error = False
		self._stream = self._req
		self._read_started = False

		def __repr__(self):
		# Since this is called as part of error handling, we need to be very
		@@ -81,26 +83,6 @@ class ModPythonRequest(http.HttpRequest):
		# mod_python < 3.2.10 doesn't have req.is_https().
		return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1')

		def _load_post_and_files(self):
		"Populates self._post and self._files"
		if self.method != 'POST':
		self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
		return

		if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
		self._raw_post_data = ''
		try:
		self._post, self._files = self.parse_file_upload(self.META, self._req)
		except:
		# See django.core.handlers.wsgi.WSGIHandler for an explanation
		# of what's going on here.
		self._post = http.QueryDict('')
		self._files = datastructures.MultiValueDict()
		self._post_parse_error = True
		raise
		else:
		self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()

		def _get_request(self):
		if not hasattr(self, '_request'):
		self._request = datastructures.MergeDict(self.POST, self.GET)
		@@ -162,13 +144,6 @@ class ModPythonRequest(http.HttpRequest):
		self._meta[key] = value
		return self._meta

		def _get_raw_post_data(self):
		try:
		return self._raw_post_data
		except AttributeError:
		self._raw_post_data = self._req.read()
		return self._raw_post_data

		def _get_method(self):
		return self.META['REQUEST_METHOD'].upper()

		@@ -178,7 +153,6 @@ class ModPythonRequest(http.HttpRequest):
		FILES = property(_get_files)
		META = property(_get_meta)
		REQUEST = property(_get_request)
		raw_post_data = property(_get_raw_post_data)
		method = property(_get_method)

		class ModPythonHandler(BaseHandler):

django/core/handlers/wsgi.py

+68 −59

Original line number	Diff line number	Diff line
		@@ -5,6 +5,7 @@ try:
		from cStringIO import StringIO
		except ImportError:
		from StringIO import StringIO
		import socket

		from django import http
		from django.core import signals
		@@ -62,20 +63,55 @@ STATUS_CODE_TEXT = {
		505: 'HTTP VERSION NOT SUPPORTED',
		}

		def safe_copyfileobj(fsrc, fdst, length=16*1024, size=0):
		"""
		A version of shutil.copyfileobj that will not read more than 'size' bytes.
		This makes it safe from clients sending more than CONTENT_LENGTH bytes of
		data in the body.
		"""
		if not size:
		return
		while size > 0:
		buf = fsrc.read(min(length, size))
		if not buf:
		class LimitedStream(object):
		'''
		LimitedStream wraps another stream in order to not allow reading from it
		past specified amount of bytes.
		'''
		def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
		self.stream = stream
		self.remaining = limit
		self.buffer = ''
		self.buf_size = buf_size

		def _read_limited(self, size=None):
		if size is None or size > self.remaining:
		size = self.remaining
		if size == 0:
		return ''
		result = self.stream.read(size)
		self.remaining -= len(result)
		return result

		def read(self, size=None):
		if size is None:
		result = self.buffer + self._read_limited()
		self.buffer = ''
		elif size < len(self.buffer):
		result = self.buffer[:size]
		self.buffer = self.buffer[size:]
		else: # size >= len(self.buffer)
		result = self.buffer + self._read_limited(size - len(self.buffer))
		self.buffer = ''
		return result

		def readline(self, size=None):
		while '\n' not in self.buffer or \
		(size is not None and len(self.buffer) < size):
		if size:
		chunk = self._read_limited(size - len(self.buffer))
		else:
		chunk = self._read_limited()
		if not chunk:
		break
		fdst.write(buf)
		size -= len(buf)
		self.buffer += chunk
		sio = StringIO(self.buffer)
		if size:
		line = sio.readline(size)
		else:
		line = sio.readline()
		self.buffer = sio.read()
		return line

		class WSGIRequest(http.HttpRequest):
		def __init__(self, environ):
		@@ -98,6 +134,24 @@ class WSGIRequest(http.HttpRequest):
		self.META['SCRIPT_NAME'] = script_name
		self.method = environ['REQUEST_METHOD'].upper()
		self._post_parse_error = False
		if isinstance(self.environ['wsgi.input'], socket._fileobject):
		# Under development server 'wsgi.input' is an instance of
		# socket._fileobject which hangs indefinitely on reading bytes past
		# available count. To prevent this it's wrapped in LimitedStream
		# that doesn't read past Content-Length bytes.
		#
		# This is not done for other kinds of inputs (like flup's FastCGI
		# streams) beacuse they don't suffer from this problem and we can
		# avoid using another wrapper with its own .read and .readline
		# implementation.
		try:
		content_length = int(self.environ.get('CONTENT_LENGTH', 0))
		except (ValueError, TypeError):
		content_length = 0
		self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
		else:
		self._stream = self.environ['wsgi.input']
		self._read_started = False

		def __repr__(self):
		# Since this is called as part of error handling, we need to be very
		@@ -133,30 +187,6 @@ class WSGIRequest(http.HttpRequest):
		return 'wsgi.url_scheme' in self.environ \
		and self.environ['wsgi.url_scheme'] == 'https'

		def _load_post_and_files(self):
		# Populates self._post and self._files
		if self.method == 'POST':
		if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
		self._raw_post_data = ''
		try:
		self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
		except:
		# An error occured while parsing POST data. Since when
		# formatting the error the request handler might access
		# self.POST, set self._post and self._file to prevent
		# attempts to parse POST data again.
		self._post = http.QueryDict('')
		self._files = datastructures.MultiValueDict()
		# Mark that an error occured. This allows self.__repr__ to
		# be explicit about it instead of simply representing an
		# empty POST
		self._post_parse_error = True
		raise
		else:
		self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
		else:
		self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()

		def _get_request(self):
		if not hasattr(self, '_request'):
		self._request = datastructures.MergeDict(self.POST, self.GET)
		@@ -192,32 +222,11 @@ class WSGIRequest(http.HttpRequest):
		self._load_post_and_files()
		return self._files

		def _get_raw_post_data(self):
		try:
		return self._raw_post_data
		except AttributeError:
		buf = StringIO()
		try:
		# CONTENT_LENGTH might be absent if POST doesn't have content at all (lighttpd)
		content_length = int(self.environ.get('CONTENT_LENGTH', 0))
		except (ValueError, TypeError):
		# If CONTENT_LENGTH was empty string or not an integer, don't
		# error out. We've also seen None passed in here (against all
		# specs, but see ticket #8259), so we handle TypeError as well.
		content_length = 0
		if content_length > 0:
		safe_copyfileobj(self.environ['wsgi.input'], buf,
		size=content_length)
		self._raw_post_data = buf.getvalue()
		buf.close()
		return self._raw_post_data

		GET = property(_get_get, _set_get)
		POST = property(_get_post, _set_post)
		COOKIES = property(_get_cookies, _set_cookies)
		FILES = property(_get_files)
		REQUEST = property(_get_request)
		raw_post_data = property(_get_raw_post_data)

		class WSGIHandler(base.BaseHandler):
		initLock = Lock()

django/http/init.py

+74 −3

Original line number	Diff line number	Diff line
		@@ -6,6 +6,10 @@ from Cookie import BaseCookie, SimpleCookie, CookieError
		from pprint import pformat
		from urllib import urlencode
		from urlparse import urljoin
		try:
		from cStringIO import StringIO
		except ImportError:
		from StringIO import StringIO
		try:
		# The mod_python version is more efficient, so try importing it first.
		from mod_python.util import parse_qsl
		@@ -132,6 +136,73 @@ class HttpRequest(object):
		parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
		return parser.parse()

		def _get_raw_post_data(self):
		if not hasattr(self, '_raw_post_data'):
		if self._read_started:
		raise Exception("You cannot access raw_post_data after reading from request's data stream")
		self._raw_post_data = self.read()
		self._stream = StringIO(self._raw_post_data)
		return self._raw_post_data
		raw_post_data = property(_get_raw_post_data)

		def _mark_post_parse_error(self):
		self._post = QueryDict('')
		self._files = MultiValueDict()
		self._post_parse_error = True

		def _load_post_and_files(self):
		# Populates self._post and self._files
		if self.method != 'POST':
		self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
		return
		if self._read_started:
		self._mark_post_parse_error()
		return

		if self.META.get('CONTENT_TYPE', '').startswith('multipart'):
		self._raw_post_data = ''
		try:
		self._post, self._files = self.parse_file_upload(self.META, self)
		except:
		# An error occured while parsing POST data. Since when
		# formatting the error the request handler might access
		# self.POST, set self._post and self._file to prevent
		# attempts to parse POST data again.
		# Mark that an error occured. This allows self.__repr__ to
		# be explicit about it instead of simply representing an
		# empty POST
		self._mark_post_parse_error()
		raise
		else:
		self._post, self._files = QueryDict(self.raw_post_data, encoding=self._encoding), MultiValueDict()

		## File-like and iterator interface.
		##
		## Expects self._stream to be set to an appropriate source of bytes by
		## a corresponding request subclass (WSGIRequest or ModPythonRequest).
		## Also when request data has already been read by request.POST or
		## request.raw_post_data, self._stream points to a StringIO instance
		## containing that data.

		def read(self, args, *kwargs):
		self._read_started = True
		return self._stream.read(args, *kwargs)

		def readline(self, args, *kwargs):
		self._read_started = True
		return self._stream.readline(args, *kwargs)

		def xreadlines(self):
		while True:
		buf = self.readline()
		if not buf:
		break
		yield buf
		__iter__ = xreadlines

		def readlines(self):
		return list(iter(self))

		class QueryDict(MultiValueDict):
		"""
		A specialized MultiValueDict that takes a query string when initialized.

docs/ref/request-response.txt

+29 −2

Original line number	Diff line number	Diff line
		@@ -189,8 +189,14 @@ All attributes except ``session`` should be considered read-only.

		.. attribute:: HttpRequest.raw_post_data

		The raw HTTP POST data. This is only useful for advanced processing. Use
		``POST`` instead.
		The raw HTTP POST data as a byte string. This is useful for processing
		data in different formats than of conventional HTML forms: binary images,
		XML payload etc. For processing form data use ``HttpRequest.POST``.

		.. versionadded:: 1.3

		You can also read from an HttpRequest using file-like interface. See
		:meth:`HttpRequest.read()`.

		.. attribute:: HttpRequest.urlconf

		@@ -249,6 +255,27 @@ Methods
		If you write your own XMLHttpRequest call (on the browser side), you'll
		have to set this header manually if you want ``is_ajax()`` to work.

		.. method:: HttpRequest.read(size=None)
		.. method:: HttpRequest.readline()
		.. method:: HttpRequest.readlines()
		.. method:: HttpRequest.xreadlines()
		.. method:: HttpRequest.__iter__()

		.. versionadded:: 1.3

		Methods implementing a file-like interface for reading from an
		HttpRequest instance. This makes it possible to consume an incoming
		request in a streaming fashion. A common use-case would be to process a
		big XML payload with iterative parser without constructing a whole
		XML tree in memory.

		Given this standard interface, an HttpRequest instance can be
		passed directly to an XML parser such as ElementTree::

		import xml.etree.ElementTree as ET
		for element in ET.iterparse(request):
		process(element)


		QueryDict objects
		-----------------

tests/regressiontests/requests/tests.py

+63 −3

Original line number	Diff line number	Diff line
		from datetime import datetime, timedelta
		import time
		from StringIO import StringIO
		import unittest

		from django.http import HttpRequest, HttpResponse, parse_cookie
		from django.core.handlers.wsgi import WSGIRequest
		from django.core.handlers.wsgi import WSGIRequest, LimitedStream
		from django.core.handlers.modpython import ModPythonRequest
		from django.utils.http import cookie_date

		@@ -17,11 +18,11 @@ class RequestsTests(unittest.TestCase):
		self.assertEqual(request.META.keys(), [])

		def test_wsgirequest(self):
		request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus'})
		request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus', 'wsgi.input': StringIO('')})
		self.assertEqual(request.GET.keys(), [])
		self.assertEqual(request.POST.keys(), [])
		self.assertEqual(request.COOKIES.keys(), [])
		self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME']))
		self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME', 'wsgi.input']))
		self.assertEqual(request.META['PATH_INFO'], 'bogus')
		self.assertEqual(request.META['REQUEST_METHOD'], 'bogus')
		self.assertEqual(request.META['SCRIPT_NAME'], '')
		@@ -88,3 +89,62 @@ class RequestsTests(unittest.TestCase):
		max_age_cookie = response.cookies['max_age']
		self.assertEqual(max_age_cookie['max-age'], 10)
		self.assertEqual(max_age_cookie['expires'], cookie_date(time.time()+10))

		def test_limited_stream(self):
		# Read all of a limited stream
		stream = LimitedStream(StringIO('test'), 2)
		self.assertEqual(stream.read(), 'te')

		# Read a number of characters greater than the stream has to offer
		stream = LimitedStream(StringIO('test'), 2)
		self.assertEqual(stream.read(5), 'te')

		# Read sequentially from a stream
		stream = LimitedStream(StringIO('12345678'), 8)
		self.assertEqual(stream.read(5), '12345')
		self.assertEqual(stream.read(5), '678')

		# Read lines from a stream
		stream = LimitedStream(StringIO('1234\n5678\nabcd\nefgh\nijkl'), 24)
		# Read a full line, unconditionally
		self.assertEqual(stream.readline(), '1234\n')
		# Read a number of characters less than a line
		self.assertEqual(stream.readline(2), '56')
		# Read the rest of the partial line
		self.assertEqual(stream.readline(), '78\n')
		# Read a full line, with a character limit greater than the line length
		self.assertEqual(stream.readline(6), 'abcd\n')
		# Read the next line, deliberately terminated at the line end
		self.assertEqual(stream.readline(4), 'efgh')
		# Read the next line... just the line end
		self.assertEqual(stream.readline(), '\n')
		# Read everything else.
		self.assertEqual(stream.readline(), 'ijkl')

		def test_stream(self):
		request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
		self.assertEqual(request.read(), 'name=value')

		def test_read_after_value(self):
		"""
		Reading from request is allowed after accessing request contents as
		POST or raw_post_data.
		"""
		request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
		self.assertEqual(request.POST, {u'name': [u'value']})
		self.assertEqual(request.raw_post_data, 'name=value')
		self.assertEqual(request.read(), 'name=value')

		def test_value_after_read(self):
		"""
		Construction of POST or raw_post_data is not allowed after reading
		from request.
		"""
		request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
		self.assertEqual(request.read(2), 'na')
		self.assertRaises(Exception, lambda: request.raw_post_data)
		self.assertEqual(request.POST, {})

		def test_read_by_lines(self):
		request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
		self.assertEqual(list(request), ['name=value'])