Fixed #17003 - prefetch_related should support foreign keys/one-to-one (052a011e) · Commits · Dom Sekotill / django

django/contrib/contenttypes/generic.py

+51 −1

Original line number	Diff line number	Diff line
		@@ -2,7 +2,10 @@
		Classes allowing "generic" relations through ContentType and object-id fields.
		"""

		from collections import defaultdict
		from functools import partial
		from operator import attrgetter

		from django.core.exceptions import ObjectDoesNotExist
		from django.db import connection
		from django.db.models import signals
		@@ -59,6 +62,49 @@ class GenericForeignKey(object):
		# This should never happen. I love comments like this, don't you?
		raise Exception("Impossible arguments to GFK.get_content_type!")

		def get_prefetch_query_set(self, instances):
		# For efficiency, group the instances by content type and then do one
		# query per model
		fk_dict = defaultdict(list)
		# We need one instance for each group in order to get the right db:
		instance_dict = {}
		ct_attname = self.model._meta.get_field(self.ct_field).get_attname()
		for instance in instances:
		# We avoid looking for values if either ct_id or fkey value is None
		ct_id = getattr(instance, ct_attname)
		if ct_id is not None:
		fk_val = getattr(instance, self.fk_field)
		if fk_val is not None:
		fk_dict[ct_id].append(fk_val)
		instance_dict[ct_id] = instance

		ret_val = []
		for ct_id, fkeys in fk_dict.items():
		instance = instance_dict[ct_id]
		ct = self.get_content_type(id=ct_id, using=instance._state.db)
		ret_val.extend(ct.get_all_objects_for_this_type(pk__in=fkeys))

		# For doing the join in Python, we have to match both the FK val and the
		# content type, so the 'attr' vals we return need to be callables that
		# will return a (fk, class) pair.
		def gfk_key(obj):
		ct_id = getattr(obj, ct_attname)
		if ct_id is None:
		return None
		else:
		return (getattr(obj, self.fk_field),
		self.get_content_type(id=ct_id,
		using=obj._state.db).model_class())

		return (ret_val,
		lambda obj: (obj._get_pk_val(), obj.__class__),
		gfk_key,
		True,
		self.cache_attr)

		def is_cached(self, instance):
		return hasattr(instance, self.cache_attr)

		def __get__(self, instance, instance_type=None):
		if instance is None:
		return self
		@@ -282,7 +328,11 @@ def create_generic_related_manager(superclass):
		[obj._get_pk_val() for obj in instances]
		}
		qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query)
		return (qs, self.object_id_field_name, 'pk')
		return (qs,
		attrgetter(self.object_id_field_name),
		lambda obj: obj._get_pk_val(),
		False,
		self.prefetch_cache_name)

		def add(self, *objs):
		for obj in objs:

django/contrib/contenttypes/models.py

+6 −0

Original line number	Diff line number	Diff line
		@@ -113,5 +113,11 @@ class ContentType(models.Model):
		"""
		return self.model_class()._base_manager.using(self._state.db).get(**kwargs)

		def get_all_objects_for_this_type(self, **kwargs):
		"""
		Returns all objects of this type for the keyword arguments given.
		"""
		return self.model_class()._base_manager.using(self._state.db).filter(**kwargs)

		def natural_key(self):
		return (self.app_label, self.model)

django/db/models/fields/related.py

+60 −26

Original line number	Diff line number	Diff line
		from operator import attrgetter

		from django.db import connection, router
		from django.db.backends import util
		from django.db.models import signals, get_model
		@@ -227,6 +229,22 @@ class SingleRelatedObjectDescriptor(object):
		self.related = related
		self.cache_name = related.get_cache_name()

		def is_cached(self, instance):
		return hasattr(instance, self.cache_name)

		def get_query_set(self, **db_hints):
		db = router.db_for_read(self.related.model, **db_hints)
		return self.related.model._base_manager.using(db)

		def get_prefetch_query_set(self, instances):
		vals = [instance._get_pk_val() for instance in instances]
		params = {'%s__pk__in' % self.related.field.name: vals}
		return (self.get_query_set(),
		attrgetter(self.related.field.attname),
		lambda obj: obj._get_pk_val(),
		True,
		self.cache_name)

		def __get__(self, instance, instance_type=None):
		if instance is None:
		return self
		@@ -234,8 +252,7 @@ class SingleRelatedObjectDescriptor(object):
		return getattr(instance, self.cache_name)
		except AttributeError:
		params = {'%s__pk' % self.related.field.name: instance._get_pk_val()}
		db = router.db_for_read(self.related.model, instance=instance)
		rel_obj = self.related.model._base_manager.using(db).get(**params)
		rel_obj = self.get_query_set(instance=instance).get(**params)
		setattr(instance, self.cache_name, rel_obj)
		return rel_obj

		@@ -283,14 +300,40 @@ class ReverseSingleRelatedObjectDescriptor(object):
		# ReverseSingleRelatedObjectDescriptor instance.
		def __init__(self, field_with_rel):
		self.field = field_with_rel
		self.cache_name = self.field.get_cache_name()

		def is_cached(self, instance):
		return hasattr(instance, self.cache_name)

		def get_query_set(self, **db_hints):
		db = router.db_for_read(self.field.rel.to, **db_hints)
		rel_mgr = self.field.rel.to._default_manager
		# If the related manager indicates that it should be used for
		# related fields, respect that.
		if getattr(rel_mgr, 'use_for_related_fields', False):
		return rel_mgr.using(db)
		else:
		return QuerySet(self.field.rel.to).using(db)

		def get_prefetch_query_set(self, instances):
		vals = [getattr(instance, self.field.attname) for instance in instances]
		other_field = self.field.rel.get_related_field()
		if other_field.rel:
		params = {'%s__pk__in' % self.field.rel.field_name: vals}
		else:
		params = {'%s__in' % self.field.rel.field_name: vals}
		return (self.get_query_set().filter(**params),
		attrgetter(self.field.rel.field_name),
		attrgetter(self.field.attname),
		True,
		self.cache_name)

		def __get__(self, instance, instance_type=None):
		if instance is None:
		return self

		cache_name = self.field.get_cache_name()
		try:
		return getattr(instance, cache_name)
		return getattr(instance, self.cache_name)
		except AttributeError:
		val = getattr(instance, self.field.attname)
		if val is None:
		@@ -303,16 +346,9 @@ class ReverseSingleRelatedObjectDescriptor(object):
		params = {'%s__pk' % self.field.rel.field_name: val}
		else:
		params = {'%s__exact' % self.field.rel.field_name: val}

		# If the related manager indicates that it should be used for
		# related fields, respect that.
		rel_mgr = self.field.rel.to._default_manager
		db = router.db_for_read(self.field.rel.to, instance=instance)
		if getattr(rel_mgr, 'use_for_related_fields', False):
		rel_obj = rel_mgr.using(db).get(**params)
		else:
		rel_obj = QuerySet(self.field.rel.to).using(db).get(**params)
		setattr(instance, cache_name, rel_obj)
		qs = self.get_query_set(instance=instance)
		rel_obj = qs.get(**params)
		setattr(instance, self.cache_name, rel_obj)
		return rel_obj

		def __set__(self, instance, value):
		@@ -425,15 +461,15 @@ class ForeignRelatedObjectsDescriptor(object):
		return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters)

		def get_prefetch_query_set(self, instances):
		"""
		Return a queryset that does the bulk lookup needed
		by prefetch_related functionality.
		"""
		db = self._db or router.db_for_read(self.model)
		query = {'%s__%s__in' % (rel_field.name, attname):
		[getattr(obj, attname) for obj in instances]}
		qs = super(RelatedManager, self).get_query_set().using(db).filter(**query)
		return (qs, rel_field.get_attname(), attname)
		return (qs,
		attrgetter(rel_field.get_attname()),
		attrgetter(attname),
		False,
		rel_field.related_query_name())

		def add(self, *objs):
		for obj in objs:
		@@ -507,12 +543,6 @@ def create_many_related_manager(superclass, rel):
		return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters)

		def get_prefetch_query_set(self, instances):
		"""
		Returns a tuple:
		(queryset of instances of self.model that are related to passed in instances
		attr of returned instances needed for matching
		attr of passed in instances needed for matching)
		"""
		from django.db import connections
		db = self._db or router.db_for_read(self.model)
		query = {'%s__pk__in' % self.query_field_name:
		@@ -534,7 +564,11 @@ def create_many_related_manager(superclass, rel):
		qs = qs.extra(select={'_prefetch_related_val':
		'%s.%s' % (qn(join_table), qn(source_col))})
		select_attname = fk.rel.get_related_field().get_attname()
		return (qs, '_prefetch_related_val', select_attname)
		return (qs,
		attrgetter('_prefetch_related_val'),
		attrgetter(select_attname),
		False,
		self.prefetch_cache_name)

		# If the ManyToMany relation has an intermediary model,
		# the add and remove methods do not exist.

django/db/models/query.py

+97 −28

Original line number	Diff line number	Diff line
		@@ -1612,36 +1612,42 @@ def prefetch_related_objects(result_cache, related_lookups):
		break

		# Descend down tree
		try:
		rel_obj = getattr(obj_list[0], attr)
		except AttributeError:

		# We assume that objects retrieved are homogenous (which is the premise
		# of prefetch_related), so what applies to first object applies to all.
		first_obj = obj_list[0]
		prefetcher, attr_found, is_fetched = get_prefetcher(first_obj, attr)

		if not attr_found:
		raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid "
		"parameter to prefetch_related()" %
		(attr, obj_list[0].__class__.__name__, lookup))
		(attr, first_obj.__class__.__name__, lookup))

		can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set')
		if level == len(attrs) - 1 and not can_prefetch:
		# Last one, this must resolve to a related manager.
		raise ValueError("'%s' does not resolve to a supported 'many related"
		" manager' for model %s - this is an invalid"
		" parameter to prefetch_related()."
		% (lookup, model.__name__))
		if level == len(attrs) - 1 and prefetcher is None:
		# Last one, this must resolve to something that supports
		# prefetching, otherwise there is no point adding it and the
		# developer asking for it has made a mistake.
		raise ValueError("'%s' does not resolve to a item that supports "
		"prefetching - this is an invalid parameter to "
		"prefetch_related()." % lookup)

		if can_prefetch:
		if prefetcher is not None and not is_fetched:
		# Check we didn't do this already
		current_lookup = LOOKUP_SEP.join(attrs[0:level+1])
		if current_lookup in done_queries:
		obj_list = done_queries[current_lookup]
		else:
		relmanager = rel_obj
		obj_list, additional_prl = prefetch_one_level(obj_list, relmanager, attr)
		obj_list, additional_prl = prefetch_one_level(obj_list, prefetcher, attr)
		for f in additional_prl:
		new_prl = LOOKUP_SEP.join([current_lookup, f])
		related_lookups.append(new_prl)
		done_queries[current_lookup] = obj_list
		else:
		# Assume we've got some singly related object. We replace
		# the current list of parent objects with that list.
		# Either a singly related object that has already been fetched
		# (e.g. via select_related), or hopefully some other property
		# that doesn't support prefetching but needs to be traversed.

		# We replace the current list of parent objects with that list.
		obj_list = [getattr(obj, attr) for obj in obj_list]

		# Filter out 'None' so that we can continue with nullable
		@@ -1649,18 +1655,73 @@ def prefetch_related_objects(result_cache, related_lookups):
		obj_list = [obj for obj in obj_list if obj is not None]


		def prefetch_one_level(instances, relmanager, attname):
		def get_prefetcher(instance, attr):
		"""
		For the attribute 'attr' on the given instance, finds
		an object that has a get_prefetch_query_set().
		Return a 3 tuple containing:
		(the object with get_prefetch_query_set (or None),
		a boolean that is False if the attribute was not found at all,
		a boolean that is True if the attribute has already been fetched)
		"""
		prefetcher = None
		attr_found = False
		is_fetched = False

		# For singly related objects, we have to avoid getting the attribute
		# from the object, as this will trigger the query. So we first try
		# on the class, in order to get the descriptor object.
		rel_obj_descriptor = getattr(instance.__class__, attr, None)
		if rel_obj_descriptor is None:
		try:
		rel_obj = getattr(instance, attr)
		attr_found = True
		except AttributeError:
		pass
		else:
		attr_found = True
		if rel_obj_descriptor:
		# singly related object, descriptor object has the
		# get_prefetch_query_set() method.
		if hasattr(rel_obj_descriptor, 'get_prefetch_query_set'):
		prefetcher = rel_obj_descriptor
		if rel_obj_descriptor.is_cached(instance):
		is_fetched = True
		else:
		# descriptor doesn't support prefetching, so we go ahead and get
		# the attribute on the instance rather than the class to
		# support many related managers
		rel_obj = getattr(instance, attr)
		if hasattr(rel_obj, 'get_prefetch_query_set'):
		prefetcher = rel_obj
		return prefetcher, attr_found, is_fetched


		def prefetch_one_level(instances, prefetcher, attname):
		"""
		Helper function for prefetch_related_objects

		Runs prefetches on all instances using the manager relmanager,
		assigning results to queryset against instance.attname.
		Runs prefetches on all instances using the prefetcher object,
		assigning results to relevant caches in instance.

		The prefetched objects are returned, along with any additional
		prefetches that must be done due to prefetch_related lookups
		found from default managers.
		"""
		rel_qs, rel_obj_attr, instance_attr = relmanager.get_prefetch_query_set(instances)
		# prefetcher must have a method get_prefetch_query_set() which takes a list
		# of instances, and returns a tuple:

		# (queryset of instances of self.model that are related to passed in instances,
		# callable that gets value to be matched for returned instances,
		# callable that gets value to be matched for passed in instances,
		# boolean that is True for singly related objects,
		# cache name to assign to).

		# The 'values to be matched' must be hashable as they will be used
		# in a dictionary.

		rel_qs, rel_obj_attr, instance_attr, single, cache_name =\
		prefetcher.get_prefetch_query_set(instances)
		# We have to handle the possibility that the default manager itself added
		# prefetch_related lookups to the QuerySet we just got back. We don't want to
		# trigger the prefetch_related functionality by evaluating the query.
		@@ -1676,17 +1737,25 @@ def prefetch_one_level(instances, relmanager, attname):

		rel_obj_cache = {}
		for rel_obj in all_related_objects:
		rel_attr_val = getattr(rel_obj, rel_obj_attr)
		rel_attr_val = rel_obj_attr(rel_obj)
		if rel_attr_val not in rel_obj_cache:
		rel_obj_cache[rel_attr_val] = []
		rel_obj_cache[rel_attr_val].append(rel_obj)

		for obj in instances:
		instance_attr_val = instance_attr(obj)
		vals = rel_obj_cache.get(instance_attr_val, [])
		if single:
		# Need to assign to single cache on instance
		if vals:
		setattr(obj, cache_name, vals[0])
		else:
		# Multi, attribute represents a manager with an .all() method that
		# returns a QuerySet
		qs = getattr(obj, attname).all()
		instance_attr_val = getattr(obj, instance_attr)
		qs._result_cache = rel_obj_cache.get(instance_attr_val, [])
		qs._result_cache = vals
		# We don't want the individual qs doing prefetch_related now, since we
		# have merged this into the current work.
		qs._prefetch_done = True
		obj._prefetched_objects_cache[attname] = qs
		obj._prefetched_objects_cache[cache_name] = qs
		return all_related_objects, additional_prl

docs/ref/models/querysets.txt

+57 −32

Original line number	Diff line number	Diff line
		@@ -696,14 +696,26 @@ prefetch_related
		.. versionadded:: 1.4

		Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
		related many-to-many and many-to-one objects for each of the specified lookups.

		This is similar to ``select_related`` for the 'many related objects' case, but
		note that ``prefetch_related`` causes a separate query to be issued for each set
		of related objects that you request, unlike ``select_related`` which modifies
		the original query with joins in order to get the related objects. With
		``prefetch_related``, the additional queries are done as soon as the QuerySet
		begins to be evaluated.
		related objects for each of the specified lookups.

		This has a similar purpose to ``select_related``, in that both are designed to
		stop the deluge of database queries that is caused by accessing related objects,
		but the strategy is quite different.

		``select_related`` works by creating a SQL join and including the fields of the
		related object in the SELECT statement. For this reason, ``select_related`` gets
		the related objects in the same database query. However, to avoid the much
		larger result set that would result from joining across a 'many' relationship,
		``select_related`` is limited to single-valued relationships - foreign key and
		one-to-one.

		``prefetch_related``, on the other hand, does a separate lookup for each
		relationship, and does the 'joining' in Python. This allows it to prefetch
		many-to-many and many-to-one objects, which cannot be done using
		``select_related``, in addition to the foreign key and one-to-one relationships
		that are supported by ``select_related``. It also supports prefetching of
		:class:`~django.contrib.contenttypes.generic.GenericRelation` and
		:class:`~django.contrib.contenttypes.generic.GenericForeignKey`.

		For example, suppose you have these models::

		@@ -733,14 +745,17 @@ All the relevant toppings will be fetched in a single query, and used to make
		``QuerySets`` that have a pre-filled cache of the relevant results. These
		``QuerySets`` are then used in the ``self.toppings.all()`` calls.

		Please note that use of ``prefetch_related`` will mean that the additional
		queries run will always be executed - even if you never use the related
		objects - and it always fully populates the result cache on the primary
		``QuerySet`` (which can sometimes be avoided in other cases).
		The additional queries are executed after the QuerySet has begun to be evaluated
		and the primary query has been executed. Note that the result cache of the
		primary QuerySet and all specified related objects will then be fully loaded
		into memory, which is often avoided in other cases - even after a query has been
		executed in the database, QuerySet normally tries to make uses of chunking
		between the database to avoid loading all objects into memory before you need
		them.

		Also remember that, as always with QuerySets, any subsequent chained methods
		will ignore previously cached results, and retrieve data using a fresh database
		query. So, if you write the following:
		which imply a different database query will ignore previously cached results,
		and retrieve data using a fresh database query. So, if you write the following:

		>>> pizzas = Pizza.objects.prefetch_related('toppings')
		>>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas]
		@@ -749,12 +764,6 @@ query. So, if you write the following:
		you - in fact it hurts performance, since you have done a database query that
		you haven't used. So use this feature with caution!

		The lookups that must be supplied to this method can be any attributes on the
		model instances which represent related queries that return multiple
		objects. This includes attributes representing the 'many' side of ``ForeignKey``
		relationships, forward and reverse ``ManyToManyField`` attributes, and also any
		``GenericRelations``.

		You can also use the normal join syntax to do related fields of related
		fields. Suppose we have an additional model to the example above::

		@@ -770,24 +779,40 @@ This will prefetch all pizzas belonging to restaurants, and all toppings
		belonging to those pizzas. This will result in a total of 3 database queries -
		one for the restaurants, one for the pizzas, and one for the toppings.

		>>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')
		>>> Restaurant.objects.prefetch_related('best_pizza__toppings')

		This will fetch the best pizza and all the toppings for the best pizza for each
		restaurant. This will be done in 2 database queries - one for the restaurants
		and 'best pizzas' combined (achieved through use of ``select_related``), and one
		for the toppings.
		restaurant. This will be done in 3 database queries - one for the restaurants,
		one for the 'best pizzas', and one for one for the toppings.

		Of course, the ``best_pizza`` relationship could also be fetched using
		``select_related`` to reduce the query count to 2:

		>>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')

		Since the prefetch is executed after the main query (which includes the joins
		needed by ``select_related``), it is able to detect that the ``best_pizza``
		objects have already been fetched, and it will skip fetching them again.

		Chaining ``prefetch_related`` calls will accumulate the fields that should have
		this behavior applied. To clear any ``prefetch_related`` behavior, pass `None`
		as a parameter::
		Chaining ``prefetch_related`` calls will accumulate the lookups that are
		prefetched. To clear any ``prefetch_related`` behavior, pass `None` as a
		parameter::

		>>> non_prefetched = qs.prefetch_related(None)

		One difference when using ``prefetch_related`` is that, in some circumstances,
		objects created by a query can be shared between the different objects that they
		are related to i.e. a single Python model instance can appear at more than one
		point in the tree of objects that are returned. Normally this behavior will not
		be a problem, and will in fact save both memory and CPU time.
		One difference to note when using ``prefetch_related`` is that objects created
		by a query can be shared between the different objects that they are related to
		i.e. a single Python model instance can appear at more than one point in the
		tree of objects that are returned. This will normally happen with foreign key
		relationships. Typically this behavior will not be a problem, and will in fact
		save both memory and CPU time.

		While ``prefetch_related`` supports prefetching ``GenericForeignKey``
		relationships, the number of queries will depend on the data. Since a
		``GenericForeignKey`` can reference data in multiple tables, one query per table
		referenced is needed, rather than one query for all the items. There could be
		additional queries on the ``ContentType`` table if the relevant rows have not
		already been fetched.

		extra
		~~~~~