Commit eb4f6de9 authored by Jon Dufresne's avatar Jon Dufresne Committed by Tim Graham
Browse files

Fixed #8149 -- Made File.__iter__() support universal newlines.

The following are recognized as ending a line: the Unix end-of-line
convention '\n', the Windows convention '\r\n', and the old
Macintosh convention '\r'.

http://www.python.org/dev/peps/pep-0278

Thanks tchaumeny for review.
parent eab3dc19
Loading
Loading
Loading
Loading
+34 −7
Original line number Diff line number Diff line
@@ -102,16 +102,22 @@ class File(FileProxyMixin):
        # Iterate over this file-like object by newlines
        buffer_ = None
        for chunk in self.chunks():
            chunk_buffer = BytesIO(chunk)

            for line in chunk_buffer:
            for line in chunk.splitlines(True):
                if buffer_:
                    if endswith_cr(buffer_) and not equals_lf(line):
                        # Line split after a \r newline; yield buffer_.
                        yield buffer_
                        # Continue with line.
                    else:
                        # Line either split without a newline (line
                        # continues after buffer_) or with \r\n
                        # newline (line == b'\n').
                        line = buffer_ + line
                    # buffer_ handled, clear it.
                    buffer_ = None

                # If this is the end of a line, yield
                # otherwise, wait for the next round
                if line[-1:] in (b'\n', b'\r'):
                # If this is the end of a \n or \r\n line, yield.
                if endswith_lf(line):
                    yield line
                else:
                    buffer_ = line
@@ -165,3 +171,24 @@ class ContentFile(File):

    def close(self):
        pass


def endswith_cr(line):
    """
    Return True if line (a text or byte string) ends with '\r'.
    """
    return line.endswith('\r' if isinstance(line, six.text_type) else b'\r')


def endswith_lf(line):
    """
    Return True if line (a text or byte string) ends with '\n'.
    """
    return line.endswith('\n' if isinstance(line, six.text_type) else b'\n')


def equals_lf(line):
    """
    Return True if line (a text or byte string) equals '\n'.
    """
    return line == ('\n' if isinstance(line, six.text_type) else b'\n')
+9 −0
Original line number Diff line number Diff line
@@ -53,6 +53,15 @@ The ``File`` Class

        Iterate over the file yielding one line at a time.

        .. versionchanged:: 1.8

            ``File`` now uses `universal newlines`_. The following are
            recognized as ending a line: the Unix end-of-line convention
            ``'\n'``, the Windows convention ``'\r\n'``, and the old Macintosh
            convention ``'\r'``.

            .. _universal newlines: http://www.python.org/dev/peps/pep-0278

    .. method:: chunks([chunk_size=None])

        Iterate over the file yielding "chunks" of a given size. ``chunk_size``
+9 −4
Original line number Diff line number Diff line
@@ -82,10 +82,15 @@ Here are some useful attributes of ``UploadedFile``:
        for line in uploadedfile:
            do_something_with(line)

    However, *unlike* standard Python files, :class:`UploadedFile` only
    understands ``\n`` (also known as "Unix-style") line endings. If you know
    that you need to handle uploaded files with different line endings, you'll
    need to do so in your view.
    Lines are split using `universal newlines`_. The following are recognized
    as ending a line: the Unix end-of-line convention ``'\n'``, the Windows
    convention ``'\r\n'``, and the old Macintosh convention ``'\r'``.

    .. _universal newlines: http://www.python.org/dev/peps/pep-0278

    .. versionchanged:: 1.8

        Previously lines were only split on the Unix end-of-line ``'\n'``.

Subclasses of ``UploadedFile`` include:

+7 −0
Original line number Diff line number Diff line
@@ -659,6 +659,13 @@ Miscellaneous
* By default, :ref:`call_command <call-command>` now always skips the check
  framework (unless you pass it ``skip_checks=False``).

* When iterating over lines, :class:`~django.core.files.File` now uses
  `universal newlines`_. The following are recognized as ending a line: the
  Unix end-of-line convention ``'\n'``, the Windows convention ``'\r\n'``, and
  the old Macintosh convention ``'\r'``.

  .. _universal newlines: http://www.python.org/dev/peps/pep-0278

.. _deprecated-features-1.8:

Features deprecated in 1.8
+49 −1
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from io import BytesIO
from io import BytesIO, StringIO
import os
import gzip
import tempfile
@@ -72,6 +72,54 @@ class FileTests(unittest.TestCase):
        file = File(BytesIO(b'one\ntwo\nthree'))
        self.assertEqual(list(file), [b'one\n', b'two\n', b'three'])

    def test_file_iteration_windows_newlines(self):
        """
        #8149 - File objects with \r\n line endings should yield lines
        when iterated over.
        """
        f = File(BytesIO(b'one\r\ntwo\r\nthree'))
        self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])

    def test_file_iteration_mac_newlines(self):
        """
        #8149 - File objects with \r line endings should yield lines
        when iterated over.
        """
        f = File(BytesIO(b'one\rtwo\rthree'))
        self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])

    def test_file_iteration_mixed_newlines(self):
        f = File(BytesIO(b'one\rtwo\nthree\r\nfour'))
        self.assertEqual(list(f), [b'one\r', b'two\n', b'three\r\n', b'four'])

    def test_file_iteration_with_unix_newline_at_chunk_boundary(self):
        f = File(BytesIO(b'one\ntwo\nthree'))
        # Set chunk size to create a boundary after \n:
        # b'one\n...
        #        ^
        f.DEFAULT_CHUNK_SIZE = 4
        self.assertEqual(list(f), [b'one\n', b'two\n', b'three'])

    def test_file_iteration_with_windows_newline_at_chunk_boundary(self):
        f = File(BytesIO(b'one\r\ntwo\r\nthree'))
        # Set chunk size to create a boundary between \r and \n:
        # b'one\r\n...
        #        ^
        f.DEFAULT_CHUNK_SIZE = 4
        self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])

    def test_file_iteration_with_mac_newline_at_chunk_boundary(self):
        f = File(BytesIO(b'one\rtwo\rthree'))
        # Set chunk size to create a boundary after \r:
        # b'one\r...
        #        ^
        f.DEFAULT_CHUNK_SIZE = 4
        self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])

    def test_file_iteration_with_text(self):
        f = File(StringIO('one\ntwo\nthree'))
        self.assertEqual(list(f), ['one\n', 'two\n', 'three'])


class NoNameFileTestCase(unittest.TestCase):
    """