Loading django/db/models/base.py +9 −10 Original line number Diff line number Diff line Loading @@ -10,7 +10,7 @@ from django.core import validators from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError from django.db.models.fields import AutoField, ImageField, FieldDoesNotExist from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField from django.db.models.query import delete_objects, Q from django.db.models.query import delete_objects, Q, CollectedObjects from django.db.models.options import Options, AdminOptions from django.db import connection, transaction from django.db.models import signals Loading Loading @@ -368,17 +368,16 @@ class Model(object): error_dict[f.name] = errors return error_dict def _collect_sub_objects(self, seen_objs): def _collect_sub_objects(self, seen_objs, parent=None, nullable=False): """ Recursively populates seen_objs with all objects related to this object. When done, seen_objs will be in the format: {model_class: {pk_val: obj, pk_val: obj, ...}, model_class: {pk_val: obj, pk_val: obj, ...}, ...} When done, seen_objs.items() will be in the format: [(model_class, {pk_val: obj, pk_val: obj, ...}), (model_class, {pk_val: obj, pk_val: obj, ...}),...] """ pk_val = self._get_pk_val() if pk_val in seen_objs.setdefault(self.__class__, {}): if seen_objs.add(self.__class__, pk_val, self, parent, nullable): return seen_objs[self.__class__][pk_val] = self for related in self._meta.get_all_related_objects(): rel_opts_name = related.get_accessor_name() Loading @@ -388,16 +387,16 @@ class Model(object): except ObjectDoesNotExist: pass else: sub_obj._collect_sub_objects(seen_objs) sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null) else: for sub_obj in getattr(self, rel_opts_name).all(): sub_obj._collect_sub_objects(seen_objs) sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null) def delete(self): assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname) # Find all the objects than need to be deleted seen_objs = SortedDict() seen_objs = CollectedObjects() self._collect_sub_objects(seen_objs) # Actually delete the objects Loading django/db/models/query.py +107 −11 Original line number Diff line number Diff line Loading @@ -16,6 +16,92 @@ ITER_CHUNK_SIZE = CHUNK_SIZE # Pull into this namespace for backwards compatibility EmptyResultSet = sql.EmptyResultSet class CyclicDependency(Exception): pass class CollectedObjects(object): """ A container that stores keys and lists of values along with remembering the parent objects for all the keys. This is used for the database object deletion routines so that we can calculate the 'leaf' objects which should be deleted first. """ def __init__(self): self.data = {} self.children = {} def add(self, model, pk, obj, parent_model, nullable=False): """ Adds an item. model is the class of the object being added, pk is the primary key, obj is the object itself, parent_model is the model of the parent object that this object was reached through, nullable should be True if this relation is nullable. If the item already existed in the structure, returns true, otherwise false. """ d = self.data.setdefault(model, SortedDict()) retval = pk in d d[pk] = obj # Nullable relationships can be ignored -- they # are nulled out before deleting, and therefore # do not affect the order in which objects have # to be deleted. if parent_model is not None and not nullable: self.children.setdefault(parent_model, []).append(model) return retval def __contains__(self, key): return self.data.__contains__(key) def __getitem__(self, key): return self.data[key] def __nonzero__(self): return bool(self.data) def iteritems(self): for k in self.ordered_keys(): yield k, self[k] def items(self): return list(self.iteritems()) def keys(self): return self.ordered_keys() def ordered_keys(self): """ Returns the models in the order that they should be dealth with i.e. models with no dependencies first. """ dealt_with = SortedDict() # Start with items that have no children models = self.data.keys() while len(dealt_with) < len(models): found = False for model in models: children = self.children.setdefault(model, []) if len([c for c in children if c not in dealt_with]) == 0: dealt_with[model] = None found = True if not found: raise CyclicDependency("There is a cyclic dependency of items to be processed.") return dealt_with.keys() def unordered_keys(self): """ Fallback for the case where is a cyclic dependency but we don't care. """ return self.data.keys() class QuerySet(object): "Represents a lazy database lookup for a set of objects" def __init__(self, model=None, query=None): Loading Loading @@ -275,7 +361,7 @@ class QuerySet(object): while 1: # Collect all the objects to be deleted in this chunk, and all the # objects that are related to the objects that are to be deleted. seen_objs = SortedDict() seen_objs = CollectedObjects() for object in del_query[:CHUNK_SIZE]: object._collect_sub_objects(seen_objs) Loading Loading @@ -682,19 +768,27 @@ def delete_objects(seen_objs): Iterate through a list of seen classes, and remove any instances that are referred to. """ try: ordered_classes = seen_objs.keys() ordered_classes.reverse() except CyclicDependency: # if there is a cyclic dependency, we cannot in general delete # the objects. However, if an appropriate transaction is set # up, or if the database is lax enough, it will succeed. # So for now, we go ahead and try anway. ordered_classes = seen_objs.unordered_keys() obj_pairs = {} for cls in ordered_classes: seen_objs[cls] = seen_objs[cls].items() seen_objs[cls].sort() items = seen_objs[cls].items() items.sort() obj_pairs[cls] = items # Pre notify all instances to be deleted for pk_val, instance in seen_objs[cls]: for pk_val, instance in items: dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance) pk_list = [pk for pk,instance in seen_objs[cls]] pk_list = [pk for pk,instance in items] del_query = sql.DeleteQuery(cls, connection) del_query.delete_batch_related(pk_list) Loading @@ -705,15 +799,17 @@ def delete_objects(seen_objs): # Now delete the actual data for cls in ordered_classes: seen_objs[cls].reverse() pk_list = [pk for pk,instance in seen_objs[cls]] items = obj_pairs[cls] items.reverse() pk_list = [pk for pk,instance in items] del_query = sql.DeleteQuery(cls, connection) del_query.delete_batch(pk_list) # Last cleanup; set NULLs where there once was a reference to the # object, NULL the primary key of the found objects, and perform # post-notification. for pk_val, instance in seen_objs[cls]: for pk_val, instance in items: for field in cls._meta.fields: if field.rel and field.null and field.rel.to in seen_objs: setattr(instance, field.attname, None) Loading tests/modeltests/delete/models.py +91 −14 Original line number Diff line number Diff line Loading @@ -33,8 +33,46 @@ class D(DefaultRepr, models.Model): # However, if we start at As, we might find Bs first (in which # case things will be nice), or find Ds first. # Some mutually dependent models, but nullable class E(DefaultRepr, models.Model): f = models.ForeignKey('F', null=True, related_name='e_rel') class F(DefaultRepr, models.Model): e = models.ForeignKey(E, related_name='f_rel') __test__ = {'API_TESTS': """ # First, some tests for the datastructure we use >>> from django.db.models.query import CollectedObjects >>> g = CollectedObjects() >>> g.add("key1", 1, "item1", None) False >>> g["key1"] {1: 'item1'} >>> g.add("key2", 1, "item1", "key1") False >>> g.add("key2", 2, "item2", "key1") False >>> g["key2"] {1: 'item1', 2: 'item2'} >>> g.add("key3", 1, "item1", "key1") False >>> g.add("key3", 1, "item1", "key2") True >>> g.ordered_keys() ['key3', 'key2', 'key1'] >>> g.add("key2", 1, "item1", "key3") True >>> g.ordered_keys() Traceback (most recent call last): ... CyclicDependency: There is a cyclic dependency of items to be processed. # Due to the way that transactions work in the test harness, # doing m.delete() here can work but fail in a real situation, # since it may delete all objects, but not in the right order. Loading @@ -42,11 +80,10 @@ __test__ = {'API_TESTS': """ # Also, it is possible that the order is correct 'accidentally', due # solely to order of imports etc. To check this, we set the order # that 'get_models()' will retrieve to a known 'tricky' order, and # then try again with the reverse and try again. Slightly naughty # access to internals here. # that 'get_models()' will retrieve to a known 'nice' order, and # then try again with a known 'tricky' order. Slightly naughty # access to internals here :-) >>> from django.utils.datastructures import SortedDict >>> from django.db.models.loading import cache # Nice order Loading @@ -56,8 +93,6 @@ __test__ = {'API_TESTS': """ >>> del C._meta._related_objects_cache >>> del D._meta._related_objects_cache >>> a1 = A() >>> a1.save() >>> b1 = B(a=a1) Loading @@ -67,9 +102,9 @@ __test__ = {'API_TESTS': """ >>> d1 = D(c=c1, a=a1) >>> d1.save() >>> sd = SortedDict() >>> a1._collect_sub_objects(sd) >>> list(reversed(sd.keys())) >>> o = CollectedObjects() >>> a1._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>] >>> a1.delete() Loading @@ -80,7 +115,6 @@ __test__ = {'API_TESTS': """ >>> del C._meta._related_objects_cache >>> del D._meta._related_objects_cache >>> a2 = A() >>> a2.save() >>> b2 = B(a=a2) Loading @@ -90,13 +124,56 @@ __test__ = {'API_TESTS': """ >>> d2 = D(c=c2, a=a2) >>> d2.save() >>> sd2 = SortedDict() >>> a2._collect_sub_objects(sd2) >>> list(reversed(sd2.keys())) >>> o = CollectedObjects() >>> a2._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>] >>> a2.delete() # Tests for nullable related fields >>> g = CollectedObjects() >>> g.add("key1", 1, "item1", None) False >>> g.add("key2", 1, "item1", "key1", nullable=True) False >>> g.add("key1", 1, "item1", "key2") True >>> g.ordered_keys() ['key1', 'key2'] >>> e1 = E() >>> e1.save() >>> f1 = F(e=e1) >>> f1.save() >>> e1.f = f1 >>> e1.save() # Since E.f is nullable, we should delete F first (after nulling out # the E.f field), then E. >>> o = CollectedObjects() >>> e1._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>] >>> e1.delete() >>> e2 = E() >>> e2.save() >>> f2 = F(e=e2) >>> f2.save() >>> e2.f = f2 >>> e2.save() # Same deal as before, though we are starting from the other object. >>> o = CollectedObjects() >>> f2._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>] >>> f2.delete() """ } Loading
django/db/models/base.py +9 −10 Original line number Diff line number Diff line Loading @@ -10,7 +10,7 @@ from django.core import validators from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError from django.db.models.fields import AutoField, ImageField, FieldDoesNotExist from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField from django.db.models.query import delete_objects, Q from django.db.models.query import delete_objects, Q, CollectedObjects from django.db.models.options import Options, AdminOptions from django.db import connection, transaction from django.db.models import signals Loading Loading @@ -368,17 +368,16 @@ class Model(object): error_dict[f.name] = errors return error_dict def _collect_sub_objects(self, seen_objs): def _collect_sub_objects(self, seen_objs, parent=None, nullable=False): """ Recursively populates seen_objs with all objects related to this object. When done, seen_objs will be in the format: {model_class: {pk_val: obj, pk_val: obj, ...}, model_class: {pk_val: obj, pk_val: obj, ...}, ...} When done, seen_objs.items() will be in the format: [(model_class, {pk_val: obj, pk_val: obj, ...}), (model_class, {pk_val: obj, pk_val: obj, ...}),...] """ pk_val = self._get_pk_val() if pk_val in seen_objs.setdefault(self.__class__, {}): if seen_objs.add(self.__class__, pk_val, self, parent, nullable): return seen_objs[self.__class__][pk_val] = self for related in self._meta.get_all_related_objects(): rel_opts_name = related.get_accessor_name() Loading @@ -388,16 +387,16 @@ class Model(object): except ObjectDoesNotExist: pass else: sub_obj._collect_sub_objects(seen_objs) sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null) else: for sub_obj in getattr(self, rel_opts_name).all(): sub_obj._collect_sub_objects(seen_objs) sub_obj._collect_sub_objects(seen_objs, self.__class__, related.field.null) def delete(self): assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname) # Find all the objects than need to be deleted seen_objs = SortedDict() seen_objs = CollectedObjects() self._collect_sub_objects(seen_objs) # Actually delete the objects Loading
django/db/models/query.py +107 −11 Original line number Diff line number Diff line Loading @@ -16,6 +16,92 @@ ITER_CHUNK_SIZE = CHUNK_SIZE # Pull into this namespace for backwards compatibility EmptyResultSet = sql.EmptyResultSet class CyclicDependency(Exception): pass class CollectedObjects(object): """ A container that stores keys and lists of values along with remembering the parent objects for all the keys. This is used for the database object deletion routines so that we can calculate the 'leaf' objects which should be deleted first. """ def __init__(self): self.data = {} self.children = {} def add(self, model, pk, obj, parent_model, nullable=False): """ Adds an item. model is the class of the object being added, pk is the primary key, obj is the object itself, parent_model is the model of the parent object that this object was reached through, nullable should be True if this relation is nullable. If the item already existed in the structure, returns true, otherwise false. """ d = self.data.setdefault(model, SortedDict()) retval = pk in d d[pk] = obj # Nullable relationships can be ignored -- they # are nulled out before deleting, and therefore # do not affect the order in which objects have # to be deleted. if parent_model is not None and not nullable: self.children.setdefault(parent_model, []).append(model) return retval def __contains__(self, key): return self.data.__contains__(key) def __getitem__(self, key): return self.data[key] def __nonzero__(self): return bool(self.data) def iteritems(self): for k in self.ordered_keys(): yield k, self[k] def items(self): return list(self.iteritems()) def keys(self): return self.ordered_keys() def ordered_keys(self): """ Returns the models in the order that they should be dealth with i.e. models with no dependencies first. """ dealt_with = SortedDict() # Start with items that have no children models = self.data.keys() while len(dealt_with) < len(models): found = False for model in models: children = self.children.setdefault(model, []) if len([c for c in children if c not in dealt_with]) == 0: dealt_with[model] = None found = True if not found: raise CyclicDependency("There is a cyclic dependency of items to be processed.") return dealt_with.keys() def unordered_keys(self): """ Fallback for the case where is a cyclic dependency but we don't care. """ return self.data.keys() class QuerySet(object): "Represents a lazy database lookup for a set of objects" def __init__(self, model=None, query=None): Loading Loading @@ -275,7 +361,7 @@ class QuerySet(object): while 1: # Collect all the objects to be deleted in this chunk, and all the # objects that are related to the objects that are to be deleted. seen_objs = SortedDict() seen_objs = CollectedObjects() for object in del_query[:CHUNK_SIZE]: object._collect_sub_objects(seen_objs) Loading Loading @@ -682,19 +768,27 @@ def delete_objects(seen_objs): Iterate through a list of seen classes, and remove any instances that are referred to. """ try: ordered_classes = seen_objs.keys() ordered_classes.reverse() except CyclicDependency: # if there is a cyclic dependency, we cannot in general delete # the objects. However, if an appropriate transaction is set # up, or if the database is lax enough, it will succeed. # So for now, we go ahead and try anway. ordered_classes = seen_objs.unordered_keys() obj_pairs = {} for cls in ordered_classes: seen_objs[cls] = seen_objs[cls].items() seen_objs[cls].sort() items = seen_objs[cls].items() items.sort() obj_pairs[cls] = items # Pre notify all instances to be deleted for pk_val, instance in seen_objs[cls]: for pk_val, instance in items: dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance) pk_list = [pk for pk,instance in seen_objs[cls]] pk_list = [pk for pk,instance in items] del_query = sql.DeleteQuery(cls, connection) del_query.delete_batch_related(pk_list) Loading @@ -705,15 +799,17 @@ def delete_objects(seen_objs): # Now delete the actual data for cls in ordered_classes: seen_objs[cls].reverse() pk_list = [pk for pk,instance in seen_objs[cls]] items = obj_pairs[cls] items.reverse() pk_list = [pk for pk,instance in items] del_query = sql.DeleteQuery(cls, connection) del_query.delete_batch(pk_list) # Last cleanup; set NULLs where there once was a reference to the # object, NULL the primary key of the found objects, and perform # post-notification. for pk_val, instance in seen_objs[cls]: for pk_val, instance in items: for field in cls._meta.fields: if field.rel and field.null and field.rel.to in seen_objs: setattr(instance, field.attname, None) Loading
tests/modeltests/delete/models.py +91 −14 Original line number Diff line number Diff line Loading @@ -33,8 +33,46 @@ class D(DefaultRepr, models.Model): # However, if we start at As, we might find Bs first (in which # case things will be nice), or find Ds first. # Some mutually dependent models, but nullable class E(DefaultRepr, models.Model): f = models.ForeignKey('F', null=True, related_name='e_rel') class F(DefaultRepr, models.Model): e = models.ForeignKey(E, related_name='f_rel') __test__ = {'API_TESTS': """ # First, some tests for the datastructure we use >>> from django.db.models.query import CollectedObjects >>> g = CollectedObjects() >>> g.add("key1", 1, "item1", None) False >>> g["key1"] {1: 'item1'} >>> g.add("key2", 1, "item1", "key1") False >>> g.add("key2", 2, "item2", "key1") False >>> g["key2"] {1: 'item1', 2: 'item2'} >>> g.add("key3", 1, "item1", "key1") False >>> g.add("key3", 1, "item1", "key2") True >>> g.ordered_keys() ['key3', 'key2', 'key1'] >>> g.add("key2", 1, "item1", "key3") True >>> g.ordered_keys() Traceback (most recent call last): ... CyclicDependency: There is a cyclic dependency of items to be processed. # Due to the way that transactions work in the test harness, # doing m.delete() here can work but fail in a real situation, # since it may delete all objects, but not in the right order. Loading @@ -42,11 +80,10 @@ __test__ = {'API_TESTS': """ # Also, it is possible that the order is correct 'accidentally', due # solely to order of imports etc. To check this, we set the order # that 'get_models()' will retrieve to a known 'tricky' order, and # then try again with the reverse and try again. Slightly naughty # access to internals here. # that 'get_models()' will retrieve to a known 'nice' order, and # then try again with a known 'tricky' order. Slightly naughty # access to internals here :-) >>> from django.utils.datastructures import SortedDict >>> from django.db.models.loading import cache # Nice order Loading @@ -56,8 +93,6 @@ __test__ = {'API_TESTS': """ >>> del C._meta._related_objects_cache >>> del D._meta._related_objects_cache >>> a1 = A() >>> a1.save() >>> b1 = B(a=a1) Loading @@ -67,9 +102,9 @@ __test__ = {'API_TESTS': """ >>> d1 = D(c=c1, a=a1) >>> d1.save() >>> sd = SortedDict() >>> a1._collect_sub_objects(sd) >>> list(reversed(sd.keys())) >>> o = CollectedObjects() >>> a1._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>] >>> a1.delete() Loading @@ -80,7 +115,6 @@ __test__ = {'API_TESTS': """ >>> del C._meta._related_objects_cache >>> del D._meta._related_objects_cache >>> a2 = A() >>> a2.save() >>> b2 = B(a=a2) Loading @@ -90,13 +124,56 @@ __test__ = {'API_TESTS': """ >>> d2 = D(c=c2, a=a2) >>> d2.save() >>> sd2 = SortedDict() >>> a2._collect_sub_objects(sd2) >>> list(reversed(sd2.keys())) >>> o = CollectedObjects() >>> a2._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.D'>, <class 'modeltests.delete.models.C'>, <class 'modeltests.delete.models.B'>, <class 'modeltests.delete.models.A'>] >>> a2.delete() # Tests for nullable related fields >>> g = CollectedObjects() >>> g.add("key1", 1, "item1", None) False >>> g.add("key2", 1, "item1", "key1", nullable=True) False >>> g.add("key1", 1, "item1", "key2") True >>> g.ordered_keys() ['key1', 'key2'] >>> e1 = E() >>> e1.save() >>> f1 = F(e=e1) >>> f1.save() >>> e1.f = f1 >>> e1.save() # Since E.f is nullable, we should delete F first (after nulling out # the E.f field), then E. >>> o = CollectedObjects() >>> e1._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>] >>> e1.delete() >>> e2 = E() >>> e2.save() >>> f2 = F(e=e2) >>> f2.save() >>> e2.f = f2 >>> e2.save() # Same deal as before, though we are starting from the other object. >>> o = CollectedObjects() >>> f2._collect_sub_objects(o) >>> o.keys() [<class 'modeltests.delete.models.F'>, <class 'modeltests.delete.models.E'>] >>> f2.delete() """ }