Commit 3b5083be authored by Claude Paroz's avatar Claude Paroz
Browse files

Fixed #5423 -- Made dumpdata output one row at a time.

This should prevent storing all rows in memory when big sets of
data are dumped.
See ticket for heroic contributors.
parent c2139bbc
Loading
Loading
Loading
Loading
+7 −7
Original line number Diff line number Diff line
@@ -49,23 +49,23 @@ class OutputWrapper(object):
    """
    Wrapper around stdout/stderr
    """
    def __init__(self, out, style_func=None):
    def __init__(self, out, style_func=None, ending='\n'):
        self._out = out
        self.style_func = None
        if hasattr(out, 'isatty') and out.isatty():
            self.style_func = style_func
        self.ending = ending

    def __getattr__(self, name):
        return getattr(self._out, name)

    def write(self, msg, style_func=None, ending='\n'):
    def write(self, msg, style_func=None, ending=None):
        ending = ending is None and self.ending or ending
        if ending and not msg.endswith(ending):
            msg += ending
        if style_func is not None:
            msg = style_func(msg)
        elif self.style_func is not None:
            msg = self.style_func(msg)
        self._out.write(smart_str(msg))
        style_func = [f for f in (style_func, self.style_func, lambda x:x)
                      if f is not None][0]
        self._out.write(smart_str(style_func(msg)))


class BaseCommand(object):
+17 −13
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ from django.core import serializers
from django.db import router, DEFAULT_DB_ALIAS
from django.utils.datastructures import SortedDict

import sys
from optparse import make_option

class Command(BaseCommand):
@@ -97,21 +98,24 @@ class Command(BaseCommand):
        except KeyError:
            raise CommandError("Unknown serialization format: %s" % format)

        # Now collate the objects to be serialized.
        objects = []
        def get_objects():
            # Collate the objects to be serialized.
            for model in sort_dependencies(app_list.items()):
                if model in excluded_models:
                    continue
                if not model._meta.proxy and router.allow_syncdb(using, model):
                    if use_base_manager:
                    objects.extend(model._base_manager.using(using).all())
                        objects = model._base_manager
                    else:
                    objects.extend(model._default_manager.using(using).all())
                        objects = model._default_manager
                    for obj in objects.using(using).\
                            order_by(model._meta.pk.name).iterator():
                        yield obj

        try:
            self.stdout.write(serializers.serialize(format, objects,
                              indent=indent, use_natural_keys=use_natural_keys),
                              ending='')
            self.stdout.ending = None
            serializers.serialize(format, get_objects(), indent=indent,
                    use_natural_keys=use_natural_keys, stream=self.stdout)
        except Exception as e:
            if show_traceback:
                raise
+3 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ class Serializer(object):
        self.use_natural_keys = options.pop("use_natural_keys", False)

        self.start_serialization()
        self.first = True
        for obj in queryset:
            self.start_object(obj)
            # Use the concrete parent class' _meta instead of the object's _meta
@@ -57,6 +58,8 @@ class Serializer(object):
                    if self.selected_fields is None or field.attname in self.selected_fields:
                        self.handle_m2m_field(obj, field)
            self.end_object(obj)
            if self.first:
                self.first = False
        self.end_serialization()
        return self.getvalue()

+27 −2
Original line number Diff line number Diff line
@@ -21,13 +21,38 @@ class Serializer(PythonSerializer):
    """
    internal_use_only = False

    def end_serialization(self):
    def start_serialization(self):
        if json.__version__.split('.') >= ['2', '1', '3']:
            # Use JS strings to represent Python Decimal instances (ticket #16850)
            self.options.update({'use_decimal': False})
        json.dump(self.objects, self.stream, cls=DjangoJSONEncoder, **self.options)
        self._current = None
        self.json_kwargs = self.options.copy()
        self.json_kwargs.pop('stream', None)
        self.json_kwargs.pop('fields', None)
        self.stream.write("[")

    def end_serialization(self):
        if self.options.get("indent"):
            self.stream.write("\n")
        self.stream.write("]")
        if self.options.get("indent"):
            self.stream.write("\n")

    def end_object(self, obj):
        # self._current has the field data
        indent = self.options.get("indent")
        if not self.first:
            self.stream.write(",")
            if not indent:
                self.stream.write(" ")
        if indent:
            self.stream.write("\n")
        json.dump(self.get_dump_object(obj), self.stream,
                  cls=DjangoJSONEncoder, **self.json_kwargs)
        self._current = None

    def getvalue(self):
        # overwrite PythonSerializer.getvalue() with base Serializer.getvalue()
        if callable(getattr(self.stream, 'getvalue', None)):
            return self.stream.getvalue()

+8 −5
Original line number Diff line number Diff line
@@ -27,12 +27,15 @@ class Serializer(base.Serializer):
        self._current = {}

    def end_object(self, obj):
        self.objects.append({
            "model"  : smart_unicode(obj._meta),
        self.objects.append(self.get_dump_object(obj))
        self._current = None

    def get_dump_object(self, obj):
        return {
            "pk": smart_unicode(obj._get_pk_val(), strings_only=True),
            "model": smart_unicode(obj._meta),
            "fields": self._current
        })
        self._current = None
        }

    def handle_field(self, obj, field):
        value = field._get_val_from_obj(obj)
Loading