Fixed #5420 -- Added support for delayed loading of model fields. (29050ef9) · Commits · Dom Sekotill / django

django/db/models/base.py

+44 −1

Original line number	Diff line number	Diff line
		@@ -12,7 +12,8 @@ import django.db.models.manager # Imported to register signal handler.
		from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
		from django.db.models.fields import AutoField, FieldDoesNotExist
		from django.db.models.fields.related import OneToOneRel, ManyToOneRel, OneToOneField
		from django.db.models.query import delete_objects, Q, CollectedObjects
		from django.db.models.query import delete_objects, Q
		from django.db.models.query_utils import CollectedObjects, DeferredAttribute
		from django.db.models.options import Options
		from django.db import connection, transaction, DatabaseError
		from django.db.models import signals
		@@ -235,6 +236,7 @@ class ModelBase(type):

		class Model(object):
		__metaclass__ = ModelBase
		_deferred = False

		def __init__(self, args, *kwargs):
		signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs)
		@@ -271,6 +273,13 @@ class Model(object):
		for field in fields_iter:
		is_related_object = False
		if kwargs:
		# This slightly odd construct is so that we can access any
		# data-descriptor object (DeferredAttribute) without triggering
		# its __get__ method.
		if (field.attname not in kwargs and
		isinstance(self.__class__.__dict__.get(field.attname), DeferredAttribute)):
		# This field will be populated on request.
		continue
		if isinstance(field.rel, ManyToOneRel):
		try:
		# Assume object instance was passed in.
		@@ -332,6 +341,31 @@ class Model(object):
		def __hash__(self):
		return hash(self._get_pk_val())

		def __reduce__(self):
		"""
		Provide pickling support. Normally, this just dispatches to Python's
		standard handling. However, for models with deferred field loading, we
		need to do things manually, as they're dynamically created classes and
		only module-level classes can be pickled by the default path.
		"""
		if not self._deferred:
		return super(Model, self).__reduce__()
		data = self.__dict__
		defers = []
		pk_val = None
		for field in self._meta.fields:
		if isinstance(self.__class__.__dict__.get(field.attname),
		DeferredAttribute):
		defers.append(field.attname)
		if pk_val is None:
		# The pk_val and model values are the same for all
		# DeferredAttribute classes, so we only need to do this
		# once.
		obj = self.__class__.__dict__[field.attname]
		pk_val = obj.pk_value
		model = obj.model_ref()
		return (model_unpickle, (model, pk_val, defers), data)

		def _get_pk_val(self, meta=None):
		if not meta:
		meta = self._meta
		@@ -591,6 +625,15 @@ def get_absolute_url(opts, func, self, args, *kwargs):
		class Empty(object):
		pass

		def model_unpickle(model, pk_val, attrs):
		"""
		Used to unpickle Model subclasses with deferred fields.
		"""
		from django.db.models.query_utils import deferred_class_factory
		cls = deferred_class_factory(model, pk_val, attrs)
		return cls.__new__(cls)
		model_unpickle.__safe_for_unpickle__ = True

		if sys.version_info < (2, 5):
		# Prior to Python 2.5, Exception was an old-style class
		def subclass_exception(name, parent, unused):

django/db/models/manager.py

+6 −0

Original line number	Diff line number	Diff line
		@@ -167,6 +167,12 @@ class Manager(object):
		def reverse(self, args, *kwargs):
		return self.get_query_set().reverse(args, *kwargs)

		def defer(self, args, *kwargs):
		return self.get_query_set().defer(args, *kwargs)

		def only(self, args, *kwargs):
		return self.get_query_set().only(args, *kwargs)

		def _insert(self, values, **kwargs):
		return insert_query(self.model, values, **kwargs)

django/db/models/options.py

+6 −0

Original line number	Diff line number	Diff line
		@@ -477,3 +477,9 @@ class Options(object):
		self._ordered_objects = objects
		return self._ordered_objects

		def pk_index(self):
		"""
		Returns the index of the primary key field in the self.fields list.
		"""
		return self.fields.index(self.pk)

django/db/models/query.py

+87 −106

Original line number	Diff line number	Diff line
		"""
		The main QuerySet implementation. This provides the public API for the ORM.
		"""

		try:
		set
		except NameError:
		@@ -6,9 +10,8 @@ except NameError:
		from django.db import connection, transaction, IntegrityError
		from django.db.models.aggregates import Aggregate
		from django.db.models.fields import DateField
		from django.db.models.query_utils import Q, select_related_descend
		from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory
		from django.db.models import signals, sql
		from django.utils.datastructures import SortedDict


		# Used to control how many objects are worked with at once in some cases (e.g.
		@@ -22,102 +25,6 @@ REPR_OUTPUT_SIZE = 20
		# Pull into this namespace for backwards compatibility.
		EmptyResultSet = sql.EmptyResultSet


		class CyclicDependency(Exception):
		"""
		An error when dealing with a collection of objects that have a cyclic
		dependency, i.e. when deleting multiple objects.
		"""
		pass


		class CollectedObjects(object):
		"""
		A container that stores keys and lists of values along with remembering the
		parent objects for all the keys.

		This is used for the database object deletion routines so that we can
		calculate the 'leaf' objects which should be deleted first.
		"""

		def __init__(self):
		self.data = {}
		self.children = {}

		def add(self, model, pk, obj, parent_model, nullable=False):
		"""
		Adds an item to the container.

		Arguments:
		* model - the class of the object being added.
		* pk - the primary key.
		* obj - the object itself.
		* parent_model - the model of the parent object that this object was
		reached through.
		* nullable - should be True if this relation is nullable.

		Returns True if the item already existed in the structure and
		False otherwise.
		"""
		d = self.data.setdefault(model, SortedDict())
		retval = pk in d
		d[pk] = obj
		# Nullable relationships can be ignored -- they are nulled out before
		# deleting, and therefore do not affect the order in which objects
		# have to be deleted.
		if parent_model is not None and not nullable:
		self.children.setdefault(parent_model, []).append(model)
		return retval

		def __contains__(self, key):
		return self.data.__contains__(key)

		def __getitem__(self, key):
		return self.data[key]

		def __nonzero__(self):
		return bool(self.data)

		def iteritems(self):
		for k in self.ordered_keys():
		yield k, self[k]

		def items(self):
		return list(self.iteritems())

		def keys(self):
		return self.ordered_keys()

		def ordered_keys(self):
		"""
		Returns the models in the order that they should be dealt with (i.e.
		models with no dependencies first).
		"""
		dealt_with = SortedDict()
		# Start with items that have no children
		models = self.data.keys()
		while len(dealt_with) < len(models):
		found = False
		for model in models:
		if model in dealt_with:
		continue
		children = self.children.setdefault(model, [])
		if len([c for c in children if c not in dealt_with]) == 0:
		dealt_with[model] = None
		found = True
		if not found:
		raise CyclicDependency(
		"There is a cyclic dependency of items to be processed.")

		return dealt_with.keys()

		def unordered_keys(self):
		"""
		Fallback for the case where is a cyclic dependency but we don't care.
		"""
		return self.data.keys()


		class QuerySet(object):
		"""
		Represents a lazy database lookup for a set of objects.
		@@ -275,6 +182,11 @@ class QuerySet(object):
		extra_select = self.query.extra_select.keys()
		aggregate_select = self.query.aggregate_select.keys()

		only_load = self.query.get_loaded_field_names()
		if not fill_cache:
		fields = self.model._meta.fields
		pk_idx = self.model._meta.pk_index()

		index_start = len(extra_select)
		aggregate_start = index_start + len(self.model._meta.fields)

		@@ -282,9 +194,30 @@ class QuerySet(object):
		if fill_cache:
		obj, _ = get_cached_row(self.model, row,
		index_start, max_depth,
		requested=requested, offset=len(aggregate_select))
		requested=requested, offset=len(aggregate_select),
		only_load=only_load)
		else:
		load_fields = only_load.get(self.model)
		if load_fields:
		# Some fields have been deferred, so we have to initialise
		# via keyword arguments.
		row_data = row[index_start:aggregate_start]
		pk_val = row_data[pk_idx]
		skip = set()
		init_list = []
		for field in fields:
		if field.name not in load_fields:
		skip.add(field.attname)
		else:
		init_list.append(field.attname)
		if skip:
		model_cls = deferred_class_factory(self.model, pk_val,
		skip)
		obj = model_cls(**dict(zip(init_list, row_data)))
		else:
		# omit aggregates in object creation
		obj = self.model(*row[index_start:aggregate_start])
		else:
		# Omit aggregates in object creation.
		obj = self.model(*row[index_start:aggregate_start])

		for i, k in enumerate(extra_select):
		@@ -655,6 +588,35 @@ class QuerySet(object):
		clone.query.standard_ordering = not clone.query.standard_ordering
		return clone

		def defer(self, *fields):
		"""
		Defers the loading of data for certain fields until they are accessed.
		The set of fields to defer is added to any existing set of deferred
		fields. The only exception to this is if None is passed in as the only
		parameter, in which case all deferrals are removed (None acts as a
		reset option).
		"""
		clone = self._clone()
		if fields == (None,):
		clone.query.clear_deferred_loading()
		else:
		clone.query.add_deferred_loading(fields)
		return clone

		def only(self, *fields):
		"""
		Essentially, the opposite of defer. Only the fields passed into this
		method and that are not already specified as deferred are loaded
		immediately when the queryset is evaluated.
		"""
		if fields == [None]:
		# Can only pass None to defer(), not only(), as the rest option.
		# That won't stop people trying to do this, so let's be explicit.
		raise TypeError("Cannot pass None as an argument to only().")
		clone = self._clone()
		clone.query.add_immediate_loading(fields)
		return clone

		###################
		# PRIVATE METHODS #
		###################
		@@ -757,6 +719,7 @@ class ValuesQuerySet(QuerySet):
		Called by the _clone() method after initializing the rest of the
		instance.
		"""
		self.query.clear_deferred_loading()
		self.query.clear_select_fields()

		if self._fields:
		@@ -847,9 +810,9 @@ class ValuesListQuerySet(ValuesQuerySet):
		for row in self.query.results_iter():
		yield tuple(row)
		else:
		# When extra(select=...) or an annotation is involved, the extra cols are
		# always at the start of the row, and we need to reorder the fields
		# to match the order in self._fields.
		# When extra(select=...) or an annotation is involved, the extra
		# cols are always at the start of the row, and we need to reorder
		# the fields to match the order in self._fields.
		extra_names = self.query.extra_select.keys()
		field_names = self.field_names
		aggregate_names = self.query.aggregate_select.keys()
		@@ -884,6 +847,7 @@ class DateQuerySet(QuerySet):
		Called by the _clone() method after initializing the rest of the
		instance.
		"""
		self.query.clear_deferred_loading()
		self.query = self.query.clone(klass=sql.DateQuery, setup=True)
		self.query.select = []
		field = self.model._meta.get_field(self._field_name, many_to_many=False)
		@@ -935,7 +899,7 @@ class EmptyQuerySet(QuerySet):


		def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
		requested=None, offset=0):
		requested=None, offset=0, only_load=None):
		"""
		Helper function that recursively returns an object with the specified
		related attributes already populated.
		@@ -950,6 +914,23 @@ def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
		if not [x for x in fields if x is not None]:
		# If we only have a list of Nones, there was not related object.
		obj = None
		else:
		load_fields = only_load and only_load.get(klass) or None
		if load_fields:
		# Handle deferred fields.
		skip = set()
		init_list = []
		pk_val = fields[klass._meta.pk_index()]
		for field in klass._meta.fields:
		if field.name not in load_fields:
		skip.add(field.name)
		else:
		init_list.append(field.attname)
		if skip:
		klass = deferred_class_factory(klass, pk_val, skip)
		obj = klass(**dict(zip(init_list, fields)))
		else:
		obj = klass(*fields)
		else:
		obj = klass(*fields)
		index_end += offset

django/db/models/query_utils.py

+169 −2

Original line number	Diff line number	Diff line
		"""
		Various data structures used in query construction.

		Factored out from django.db.models.query so that they can also be used by other
		modules without getting into circular import difficulties.
		Factored out from django.db.models.query to avoid making the main module very
		large and/or so that they can be used by other modules without getting into
		circular import difficulties.
		"""

		import weakref
		from copy import deepcopy

		from django.utils import tree
		from django.utils.datastructures import SortedDict

		try:
		sorted
		except NameError:
		from django.utils.itercompat import sorted # For Python 2.3.


		class CyclicDependency(Exception):
		"""
		An error when dealing with a collection of objects that have a cyclic
		dependency, i.e. when deleting multiple objects.
		"""
		pass

		class CollectedObjects(object):
		"""
		A container that stores keys and lists of values along with remembering the
		parent objects for all the keys.

		This is used for the database object deletion routines so that we can
		calculate the 'leaf' objects which should be deleted first.
		"""

		def __init__(self):
		self.data = {}
		self.children = {}

		def add(self, model, pk, obj, parent_model, nullable=False):
		"""
		Adds an item to the container.

		Arguments:
		* model - the class of the object being added.
		* pk - the primary key.
		* obj - the object itself.
		* parent_model - the model of the parent object that this object was
		reached through.
		* nullable - should be True if this relation is nullable.

		Returns True if the item already existed in the structure and
		False otherwise.
		"""
		d = self.data.setdefault(model, SortedDict())
		retval = pk in d
		d[pk] = obj
		# Nullable relationships can be ignored -- they are nulled out before
		# deleting, and therefore do not affect the order in which objects
		# have to be deleted.
		if parent_model is not None and not nullable:
		self.children.setdefault(parent_model, []).append(model)
		return retval

		def __contains__(self, key):
		return self.data.__contains__(key)

		def __getitem__(self, key):
		return self.data[key]

		def __nonzero__(self):
		return bool(self.data)

		def iteritems(self):
		for k in self.ordered_keys():
		yield k, self[k]

		def items(self):
		return list(self.iteritems())

		def keys(self):
		return self.ordered_keys()

		def ordered_keys(self):
		"""
		Returns the models in the order that they should be dealt with (i.e.
		models with no dependencies first).
		"""
		dealt_with = SortedDict()
		# Start with items that have no children
		models = self.data.keys()
		while len(dealt_with) < len(models):
		found = False
		for model in models:
		if model in dealt_with:
		continue
		children = self.children.setdefault(model, [])
		if len([c for c in children if c not in dealt_with]) == 0:
		dealt_with[model] = None
		found = True
		if not found:
		raise CyclicDependency(
		"There is a cyclic dependency of items to be processed.")

		return dealt_with.keys()

		def unordered_keys(self):
		"""
		Fallback for the case where is a cyclic dependency but we don't care.
		"""
		return self.data.keys()

		class QueryWrapper(object):
		"""
		@@ -51,6 +153,39 @@ class Q(tree.Node):
		obj.negate()
		return obj

		class DeferredAttribute(object):
		"""
		A wrapper for a deferred-loading field. When the value is read from this
		object the first time, the query is executed.
		"""
		def __init__(self, field_name, pk_value, model):
		self.field_name = field_name
		self.pk_value = pk_value
		self.model_ref = weakref.ref(model)
		self.loaded = False

		def __get__(self, instance, owner):
		"""
		Retrieves and caches the value from the datastore on the first lookup.
		Returns the cached value.
		"""
		assert instance is not None
		if not self.loaded:
		obj = self.model_ref()
		if obj is None:
		return
		self.value = list(obj._base_manager.filter(pk=self.pk_value).values_list(self.field_name, flat=True))[0]
		self.loaded = True
		return self.value

		def __set__(self, name, value):
		"""
		Deferred loading attributes can be set normally (which means there will
		never be a database lookup involved.
		"""
		self.value = value
		self.loaded = True

		def select_related_descend(field, restricted, requested):
		"""
		Returns True if this field should be used to descend deeper for
		@@ -67,3 +202,35 @@ def select_related_descend(field, restricted, requested):
		if not restricted and field.null:
		return False
		return True

		# This function is needed because data descriptors must be defined on a class
		# object, not an instance, to have any effect.

		def deferred_class_factory(model, pk_value, attrs):
		"""
		Returns a class object that is a copy of "model" with the specified "attrs"
		being replaced with DeferredAttribute objects. The "pk_value" ties the
		deferred attributes to a particular instance of the model.
		"""
		class Meta:
		pass
		setattr(Meta, "proxy", True)
		setattr(Meta, "app_label", model._meta.app_label)

		# The app_cache wants a unique name for each model, otherwise the new class
		# won't be created (we get an old one back). Therefore, we generate the
		# name using the passed in attrs. It's OK to reuse an old case if the attrs
		# are identical.
		name = "%s_Deferred_%s" % (model.__name__, '_'.join(sorted(list(attrs))))

		overrides = dict([(attr, DeferredAttribute(attr, pk_value, model))
		for attr in attrs])
		overrides["Meta"] = Meta
		overrides["__module__"] = model.__module__
		overrides["_deferred"] = True
		return type(name, (model,), overrides)

		# The above function is also used to unpickle model instances with deferred
		# fields.
		deferred_class_factory.__safe_for_unpickling__ = True