Commit 2a55301f authored by Matthew Somerville's avatar Matthew Somerville Committed by Tim Graham
Browse files

[1.8.x] Fixed #24242 -- Improved efficiency of utils.text.compress_sequence()

The function no longer flushes zfile after each write as doing so can
lead to the gzipped streamed content being larger than the original
content; each flush adds a 5/6 byte type 0 block. Removing this means
buf.read() may return nothing, so only yield if that has some data.
Testing shows without the flush() the buffer is being flushed every 17k
or so and compresses the same as if it had been done as a whole string.

Backport of caa3562d from master
parent d585ade0
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -304,6 +304,8 @@ class StreamingBuffer(object):
        self.vals.append(val)

    def read(self):
        if not self.vals:
            return b''
        ret = b''.join(self.vals)
        self.vals = []
        return ret
@@ -323,8 +325,9 @@ def compress_sequence(sequence):
    yield buf.read()
    for item in sequence:
        zfile.write(item)
        zfile.flush()
        yield buf.read()
        data = buf.read()
        if data:
            yield data
    zfile.close()
    yield buf.read()

+11 −1
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from unittest import skipUnless
import json
import warnings
from unittest import skipUnless

from django.test import SimpleTestCase, ignore_warnings
from django.test.utils import reset_warning_registry
@@ -198,6 +199,15 @@ class TestUtilsText(SimpleTestCase):
        filename = "^&'@{}[],$=!-#()%+~_123.txt"
        self.assertEqual(text.get_valid_filename(filename), "-_123.txt")

    def test_compress_sequence(self):
        data = [{'key': i} for i in range(10)]
        seq = list(json.JSONEncoder().iterencode(data))
        seq = [s.encode('utf-8') for s in seq]
        actual_length = len(b''.join(seq))
        out = text.compress_sequence(seq)
        compressed_length = len(b''.join(out))
        self.assertTrue(compressed_length < actual_length)

    @ignore_warnings(category=RemovedInDjango19Warning)
    def test_javascript_quote(self):
        input = "<script>alert('Hello \\xff.\n Welcome\there\r');</script>"