Commit 1cd6e04c authored by Anssi Kääriäinen's avatar Anssi Kääriäinen
Browse files

Fixed #18676 -- Allow fast-path deletion of objects

Objects can be fast-path deleted if there are no signals, and there are
no further cascades. If fast-path is taken, the objects do not need to
be loaded into memory before deletion.

Thanks to Jeremy Dunck, Simon Charette and Alex Gaynor for reviewing
the patch.
parent 3fcca0e9
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -191,6 +191,13 @@ class NestedObjects(Collector):
            roots.extend(self._nested(root, seen, format_callback))
        return roots

    def can_fast_delete(self, *args, **kwargs):
        """
        We always want to load the objects into memory so that we can display
        them to the user in confirm page.
        """
        return False


def model_format_dict(obj):
    """
+57 −6
Original line number Diff line number Diff line
@@ -77,6 +77,9 @@ class Collector(object):
        self.data = {}
        self.batches = {} # {model: {field: set([instances])}}
        self.field_updates = {} # {model: {(field, value): set([instances])}}
        # fast_deletes is a list of queryset-likes that can be deleted without
        # fetching the objects into memory.
        self.fast_deletes = [] 

        # Tracks deletion-order dependency for databases without transactions
        # or ability to defer constraint checks. Only concrete model classes
@@ -131,6 +134,43 @@ class Collector(object):
            model, {}).setdefault(
            (field, value), set()).update(objs)

    def can_fast_delete(self, objs, from_field=None):
        """
        Determines if the objects in the given queryset-like can be
        fast-deleted. This can be done if there are no cascades, no
        parents and no signal listeners for the object class.

        The 'from_field' tells where we are coming from - we need this to
        determine if the objects are in fact to be deleted. Allows also
        skipping parent -> child -> parent chain preventing fast delete of
        the child.
        """
        if from_field and from_field.rel.on_delete is not CASCADE:
            return False
        if not (hasattr(objs, 'model') and hasattr(objs, '_raw_delete')):
            return False
        model = objs.model
        if (signals.pre_delete.has_listeners(model)
                or signals.post_delete.has_listeners(model)
                or signals.m2m_changed.has_listeners(model)):
            return False
        # The use of from_field comes from the need to avoid cascade back to
        # parent when parent delete is cascading to child.
        opts = model._meta
        if any(link != from_field for link in opts.concrete_model._meta.parents.values()):
            return False
        # Foreign keys pointing to this model, both from m2m and other
        # models.
        for related in opts.get_all_related_objects(
            include_hidden=True, include_proxy_eq=True):
            if related.field.rel.on_delete is not DO_NOTHING:
                return False
        # GFK deletes
        for relation in opts.many_to_many:
            if not relation.rel.through:
                return False
        return True

    def collect(self, objs, source=None, nullable=False, collect_related=True,
        source_attr=None, reverse_dependency=False):
        """
@@ -148,6 +188,9 @@ class Collector(object):
        models, the one case in which the cascade follows the forwards
        direction of an FK rather than the reverse direction.)
        """
        if self.can_fast_delete(objs):
            self.fast_deletes.append(objs)
            return
        new_objs = self.add(objs, source, nullable,
                            reverse_dependency=reverse_dependency)
        if not new_objs:
@@ -160,6 +203,10 @@ class Collector(object):
        concrete_model = model._meta.concrete_model
        for ptr in six.itervalues(concrete_model._meta.parents):
            if ptr:
                # FIXME: This seems to be buggy and execute a query for each
                # parent object fetch. We have the parent data in the obj,
                # but we don't have a nice way to turn that data into parent
                # object instance.
                parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
                self.collect(parent_objs, source=model,
                             source_attr=ptr.rel.related_name,
@@ -170,12 +217,12 @@ class Collector(object):
            for related in model._meta.get_all_related_objects(
                    include_hidden=True, include_proxy_eq=True):
                field = related.field
                if related.model._meta.auto_created:
                    self.add_batch(related.model, field, new_objs)
                else:
                    sub_objs = self.related_objects(related, new_objs)
                    if not sub_objs:
                if field.rel.on_delete == DO_NOTHING:
                    continue
                sub_objs = self.related_objects(related, new_objs)
                if self.can_fast_delete(sub_objs, from_field=field):
                    self.fast_deletes.append(sub_objs)
                elif sub_objs:
                    field.rel.on_delete(self, field, sub_objs, self.using)

            # TODO This entire block is only needed as a special case to
@@ -241,6 +288,10 @@ class Collector(object):
                    sender=model, instance=obj, using=self.using
                )

        # fast deletes
        for qs in self.fast_deletes:
            qs._raw_delete(using=self.using)

        # update fields
        for model, instances_for_fieldvalues in six.iteritems(self.field_updates):
            query = sql.UpdateQuery(model)
+8 −0
Original line number Diff line number Diff line
@@ -529,6 +529,14 @@ class QuerySet(object):
        self._result_cache = None
    delete.alters_data = True

    def _raw_delete(self, using):
        """
        Deletes objects found from the given queryset in single direct SQL
        query. No signals are sent, and there is no protection for cascades.
        """
        sql.DeleteQuery(self.model).delete_qs(self, using)
    _raw_delete.alters_data = True

    def update(self, **kwargs):
        """
        Updates all elements in the current QuerySet, setting all the given
+2 −1
Original line number Diff line number Diff line
@@ -934,6 +934,7 @@ class SQLDeleteCompiler(SQLCompiler):
        qn = self.quote_name_unless_alias
        result = ['DELETE FROM %s' % qn(self.query.tables[0])]
        where, params = self.query.where.as_sql(qn=qn, connection=self.connection)
        if where:
            result.append('WHERE %s' % where)
        return ' '.join(result), tuple(params)

+32 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ Query subclasses which provide extra functionality beyond simple data retrieval.
"""

from django.core.exceptions import FieldError
from django.db import connections
from django.db.models.constants import LOOKUP_SEP
from django.db.models.fields import DateField, FieldDoesNotExist
from django.db.models.sql.constants import *
@@ -46,6 +47,37 @@ class DeleteQuery(Query):
                    pk_list[offset:offset + GET_ITERATOR_CHUNK_SIZE]), AND)
            self.do_query(self.model._meta.db_table, where, using=using)

    def delete_qs(self, query, using):
        innerq = query.query
        # Make sure the inner query has at least one table in use.
        innerq.get_initial_alias()
        # The same for our new query.
        self.get_initial_alias()
        innerq_used_tables = [t for t in innerq.tables
                              if innerq.alias_refcount[t]]
        if ((not innerq_used_tables or innerq_used_tables == self.tables)
            and not len(innerq.having)):
            # There is only the base table in use in the query, and there are
            # no aggregate filtering going on.
            self.where = innerq.where
        else:
            pk = query.model._meta.pk
            if not connections[using].features.update_can_self_select:
                # We can't do the delete using subquery.
                values = list(query.values_list('pk', flat=True))
                if not values:
                    return
                self.delete_batch(values, using)
                return
            else:
                values = innerq
                innerq.select = [(self.get_initial_alias(), pk.column)]
            where = self.where_class()
            where.add((Constraint(None, pk.column, pk), 'in', values), AND)
            self.where = where
        self.get_compiler(using).execute_sql(None)


class UpdateQuery(Query):
    """
    Represents an "update" SQL query.
Loading