Loading django/contrib/contenttypes/generic.py +51 −1 Original line number Diff line number Diff line Loading @@ -2,7 +2,10 @@ Classes allowing "generic" relations through ContentType and object-id fields. """ from collections import defaultdict from functools import partial from operator import attrgetter from django.core.exceptions import ObjectDoesNotExist from django.db import connection from django.db.models import signals Loading Loading @@ -59,6 +62,49 @@ class GenericForeignKey(object): # This should never happen. I love comments like this, don't you? raise Exception("Impossible arguments to GFK.get_content_type!") def get_prefetch_query_set(self, instances): # For efficiency, group the instances by content type and then do one # query per model fk_dict = defaultdict(list) # We need one instance for each group in order to get the right db: instance_dict = {} ct_attname = self.model._meta.get_field(self.ct_field).get_attname() for instance in instances: # We avoid looking for values if either ct_id or fkey value is None ct_id = getattr(instance, ct_attname) if ct_id is not None: fk_val = getattr(instance, self.fk_field) if fk_val is not None: fk_dict[ct_id].append(fk_val) instance_dict[ct_id] = instance ret_val = [] for ct_id, fkeys in fk_dict.items(): instance = instance_dict[ct_id] ct = self.get_content_type(id=ct_id, using=instance._state.db) ret_val.extend(ct.get_all_objects_for_this_type(pk__in=fkeys)) # For doing the join in Python, we have to match both the FK val and the # content type, so the 'attr' vals we return need to be callables that # will return a (fk, class) pair. def gfk_key(obj): ct_id = getattr(obj, ct_attname) if ct_id is None: return None else: return (getattr(obj, self.fk_field), self.get_content_type(id=ct_id, using=obj._state.db).model_class()) return (ret_val, lambda obj: (obj._get_pk_val(), obj.__class__), gfk_key, True, self.cache_attr) def is_cached(self, instance): return hasattr(instance, self.cache_attr) def __get__(self, instance, instance_type=None): if instance is None: return self Loading Loading @@ -282,7 +328,11 @@ def create_generic_related_manager(superclass): [obj._get_pk_val() for obj in instances] } qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query) return (qs, self.object_id_field_name, 'pk') return (qs, attrgetter(self.object_id_field_name), lambda obj: obj._get_pk_val(), False, self.prefetch_cache_name) def add(self, *objs): for obj in objs: Loading django/contrib/contenttypes/models.py +6 −0 Original line number Diff line number Diff line Loading @@ -113,5 +113,11 @@ class ContentType(models.Model): """ return self.model_class()._base_manager.using(self._state.db).get(**kwargs) def get_all_objects_for_this_type(self, **kwargs): """ Returns all objects of this type for the keyword arguments given. """ return self.model_class()._base_manager.using(self._state.db).filter(**kwargs) def natural_key(self): return (self.app_label, self.model) django/db/models/fields/related.py +60 −26 Original line number Diff line number Diff line from operator import attrgetter from django.db import connection, router from django.db.backends import util from django.db.models import signals, get_model Loading Loading @@ -227,6 +229,22 @@ class SingleRelatedObjectDescriptor(object): self.related = related self.cache_name = related.get_cache_name() def is_cached(self, instance): return hasattr(instance, self.cache_name) def get_query_set(self, **db_hints): db = router.db_for_read(self.related.model, **db_hints) return self.related.model._base_manager.using(db) def get_prefetch_query_set(self, instances): vals = [instance._get_pk_val() for instance in instances] params = {'%s__pk__in' % self.related.field.name: vals} return (self.get_query_set(), attrgetter(self.related.field.attname), lambda obj: obj._get_pk_val(), True, self.cache_name) def __get__(self, instance, instance_type=None): if instance is None: return self Loading @@ -234,8 +252,7 @@ class SingleRelatedObjectDescriptor(object): return getattr(instance, self.cache_name) except AttributeError: params = {'%s__pk' % self.related.field.name: instance._get_pk_val()} db = router.db_for_read(self.related.model, instance=instance) rel_obj = self.related.model._base_manager.using(db).get(**params) rel_obj = self.get_query_set(instance=instance).get(**params) setattr(instance, self.cache_name, rel_obj) return rel_obj Loading Loading @@ -283,14 +300,40 @@ class ReverseSingleRelatedObjectDescriptor(object): # ReverseSingleRelatedObjectDescriptor instance. def __init__(self, field_with_rel): self.field = field_with_rel self.cache_name = self.field.get_cache_name() def is_cached(self, instance): return hasattr(instance, self.cache_name) def get_query_set(self, **db_hints): db = router.db_for_read(self.field.rel.to, **db_hints) rel_mgr = self.field.rel.to._default_manager # If the related manager indicates that it should be used for # related fields, respect that. if getattr(rel_mgr, 'use_for_related_fields', False): return rel_mgr.using(db) else: return QuerySet(self.field.rel.to).using(db) def get_prefetch_query_set(self, instances): vals = [getattr(instance, self.field.attname) for instance in instances] other_field = self.field.rel.get_related_field() if other_field.rel: params = {'%s__pk__in' % self.field.rel.field_name: vals} else: params = {'%s__in' % self.field.rel.field_name: vals} return (self.get_query_set().filter(**params), attrgetter(self.field.rel.field_name), attrgetter(self.field.attname), True, self.cache_name) def __get__(self, instance, instance_type=None): if instance is None: return self cache_name = self.field.get_cache_name() try: return getattr(instance, cache_name) return getattr(instance, self.cache_name) except AttributeError: val = getattr(instance, self.field.attname) if val is None: Loading @@ -303,16 +346,9 @@ class ReverseSingleRelatedObjectDescriptor(object): params = {'%s__pk' % self.field.rel.field_name: val} else: params = {'%s__exact' % self.field.rel.field_name: val} # If the related manager indicates that it should be used for # related fields, respect that. rel_mgr = self.field.rel.to._default_manager db = router.db_for_read(self.field.rel.to, instance=instance) if getattr(rel_mgr, 'use_for_related_fields', False): rel_obj = rel_mgr.using(db).get(**params) else: rel_obj = QuerySet(self.field.rel.to).using(db).get(**params) setattr(instance, cache_name, rel_obj) qs = self.get_query_set(instance=instance) rel_obj = qs.get(**params) setattr(instance, self.cache_name, rel_obj) return rel_obj def __set__(self, instance, value): Loading Loading @@ -425,15 +461,15 @@ class ForeignRelatedObjectsDescriptor(object): return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters) def get_prefetch_query_set(self, instances): """ Return a queryset that does the bulk lookup needed by prefetch_related functionality. """ db = self._db or router.db_for_read(self.model) query = {'%s__%s__in' % (rel_field.name, attname): [getattr(obj, attname) for obj in instances]} qs = super(RelatedManager, self).get_query_set().using(db).filter(**query) return (qs, rel_field.get_attname(), attname) return (qs, attrgetter(rel_field.get_attname()), attrgetter(attname), False, rel_field.related_query_name()) def add(self, *objs): for obj in objs: Loading Loading @@ -507,12 +543,6 @@ def create_many_related_manager(superclass, rel): return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters) def get_prefetch_query_set(self, instances): """ Returns a tuple: (queryset of instances of self.model that are related to passed in instances attr of returned instances needed for matching attr of passed in instances needed for matching) """ from django.db import connections db = self._db or router.db_for_read(self.model) query = {'%s__pk__in' % self.query_field_name: Loading @@ -534,7 +564,11 @@ def create_many_related_manager(superclass, rel): qs = qs.extra(select={'_prefetch_related_val': '%s.%s' % (qn(join_table), qn(source_col))}) select_attname = fk.rel.get_related_field().get_attname() return (qs, '_prefetch_related_val', select_attname) return (qs, attrgetter('_prefetch_related_val'), attrgetter(select_attname), False, self.prefetch_cache_name) # If the ManyToMany relation has an intermediary model, # the add and remove methods do not exist. Loading django/db/models/query.py +97 −28 Original line number Diff line number Diff line Loading @@ -1612,36 +1612,42 @@ def prefetch_related_objects(result_cache, related_lookups): break # Descend down tree try: rel_obj = getattr(obj_list[0], attr) except AttributeError: # We assume that objects retrieved are homogenous (which is the premise # of prefetch_related), so what applies to first object applies to all. first_obj = obj_list[0] prefetcher, attr_found, is_fetched = get_prefetcher(first_obj, attr) if not attr_found: raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid " "parameter to prefetch_related()" % (attr, obj_list[0].__class__.__name__, lookup)) (attr, first_obj.__class__.__name__, lookup)) can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set') if level == len(attrs) - 1 and not can_prefetch: # Last one, this *must* resolve to a related manager. raise ValueError("'%s' does not resolve to a supported 'many related" " manager' for model %s - this is an invalid" " parameter to prefetch_related()." % (lookup, model.__name__)) if level == len(attrs) - 1 and prefetcher is None: # Last one, this *must* resolve to something that supports # prefetching, otherwise there is no point adding it and the # developer asking for it has made a mistake. raise ValueError("'%s' does not resolve to a item that supports " "prefetching - this is an invalid parameter to " "prefetch_related()." % lookup) if can_prefetch: if prefetcher is not None and not is_fetched: # Check we didn't do this already current_lookup = LOOKUP_SEP.join(attrs[0:level+1]) if current_lookup in done_queries: obj_list = done_queries[current_lookup] else: relmanager = rel_obj obj_list, additional_prl = prefetch_one_level(obj_list, relmanager, attr) obj_list, additional_prl = prefetch_one_level(obj_list, prefetcher, attr) for f in additional_prl: new_prl = LOOKUP_SEP.join([current_lookup, f]) related_lookups.append(new_prl) done_queries[current_lookup] = obj_list else: # Assume we've got some singly related object. We replace # the current list of parent objects with that list. # Either a singly related object that has already been fetched # (e.g. via select_related), or hopefully some other property # that doesn't support prefetching but needs to be traversed. # We replace the current list of parent objects with that list. obj_list = [getattr(obj, attr) for obj in obj_list] # Filter out 'None' so that we can continue with nullable Loading @@ -1649,18 +1655,73 @@ def prefetch_related_objects(result_cache, related_lookups): obj_list = [obj for obj in obj_list if obj is not None] def prefetch_one_level(instances, relmanager, attname): def get_prefetcher(instance, attr): """ For the attribute 'attr' on the given instance, finds an object that has a get_prefetch_query_set(). Return a 3 tuple containing: (the object with get_prefetch_query_set (or None), a boolean that is False if the attribute was not found at all, a boolean that is True if the attribute has already been fetched) """ prefetcher = None attr_found = False is_fetched = False # For singly related objects, we have to avoid getting the attribute # from the object, as this will trigger the query. So we first try # on the class, in order to get the descriptor object. rel_obj_descriptor = getattr(instance.__class__, attr, None) if rel_obj_descriptor is None: try: rel_obj = getattr(instance, attr) attr_found = True except AttributeError: pass else: attr_found = True if rel_obj_descriptor: # singly related object, descriptor object has the # get_prefetch_query_set() method. if hasattr(rel_obj_descriptor, 'get_prefetch_query_set'): prefetcher = rel_obj_descriptor if rel_obj_descriptor.is_cached(instance): is_fetched = True else: # descriptor doesn't support prefetching, so we go ahead and get # the attribute on the instance rather than the class to # support many related managers rel_obj = getattr(instance, attr) if hasattr(rel_obj, 'get_prefetch_query_set'): prefetcher = rel_obj return prefetcher, attr_found, is_fetched def prefetch_one_level(instances, prefetcher, attname): """ Helper function for prefetch_related_objects Runs prefetches on all instances using the manager relmanager, assigning results to queryset against instance.attname. Runs prefetches on all instances using the prefetcher object, assigning results to relevant caches in instance. The prefetched objects are returned, along with any additional prefetches that must be done due to prefetch_related lookups found from default managers. """ rel_qs, rel_obj_attr, instance_attr = relmanager.get_prefetch_query_set(instances) # prefetcher must have a method get_prefetch_query_set() which takes a list # of instances, and returns a tuple: # (queryset of instances of self.model that are related to passed in instances, # callable that gets value to be matched for returned instances, # callable that gets value to be matched for passed in instances, # boolean that is True for singly related objects, # cache name to assign to). # The 'values to be matched' must be hashable as they will be used # in a dictionary. rel_qs, rel_obj_attr, instance_attr, single, cache_name =\ prefetcher.get_prefetch_query_set(instances) # We have to handle the possibility that the default manager itself added # prefetch_related lookups to the QuerySet we just got back. We don't want to # trigger the prefetch_related functionality by evaluating the query. Loading @@ -1676,17 +1737,25 @@ def prefetch_one_level(instances, relmanager, attname): rel_obj_cache = {} for rel_obj in all_related_objects: rel_attr_val = getattr(rel_obj, rel_obj_attr) rel_attr_val = rel_obj_attr(rel_obj) if rel_attr_val not in rel_obj_cache: rel_obj_cache[rel_attr_val] = [] rel_obj_cache[rel_attr_val].append(rel_obj) for obj in instances: instance_attr_val = instance_attr(obj) vals = rel_obj_cache.get(instance_attr_val, []) if single: # Need to assign to single cache on instance if vals: setattr(obj, cache_name, vals[0]) else: # Multi, attribute represents a manager with an .all() method that # returns a QuerySet qs = getattr(obj, attname).all() instance_attr_val = getattr(obj, instance_attr) qs._result_cache = rel_obj_cache.get(instance_attr_val, []) qs._result_cache = vals # We don't want the individual qs doing prefetch_related now, since we # have merged this into the current work. qs._prefetch_done = True obj._prefetched_objects_cache[attname] = qs obj._prefetched_objects_cache[cache_name] = qs return all_related_objects, additional_prl docs/ref/models/querysets.txt +57 −32 Original line number Diff line number Diff line Loading @@ -696,14 +696,26 @@ prefetch_related .. versionadded:: 1.4 Returns a ``QuerySet`` that will automatically retrieve, in a single batch, related many-to-many and many-to-one objects for each of the specified lookups. This is similar to ``select_related`` for the 'many related objects' case, but note that ``prefetch_related`` causes a separate query to be issued for each set of related objects that you request, unlike ``select_related`` which modifies the original query with joins in order to get the related objects. With ``prefetch_related``, the additional queries are done as soon as the QuerySet begins to be evaluated. related objects for each of the specified lookups. This has a similar purpose to ``select_related``, in that both are designed to stop the deluge of database queries that is caused by accessing related objects, but the strategy is quite different. ``select_related`` works by creating a SQL join and including the fields of the related object in the SELECT statement. For this reason, ``select_related`` gets the related objects in the same database query. However, to avoid the much larger result set that would result from joining across a 'many' relationship, ``select_related`` is limited to single-valued relationships - foreign key and one-to-one. ``prefetch_related``, on the other hand, does a separate lookup for each relationship, and does the 'joining' in Python. This allows it to prefetch many-to-many and many-to-one objects, which cannot be done using ``select_related``, in addition to the foreign key and one-to-one relationships that are supported by ``select_related``. It also supports prefetching of :class:`~django.contrib.contenttypes.generic.GenericRelation` and :class:`~django.contrib.contenttypes.generic.GenericForeignKey`. For example, suppose you have these models:: Loading Loading @@ -733,14 +745,17 @@ All the relevant toppings will be fetched in a single query, and used to make ``QuerySets`` that have a pre-filled cache of the relevant results. These ``QuerySets`` are then used in the ``self.toppings.all()`` calls. Please note that use of ``prefetch_related`` will mean that the additional queries run will **always** be executed - even if you never use the related objects - and it always fully populates the result cache on the primary ``QuerySet`` (which can sometimes be avoided in other cases). The additional queries are executed after the QuerySet has begun to be evaluated and the primary query has been executed. Note that the result cache of the primary QuerySet and all specified related objects will then be fully loaded into memory, which is often avoided in other cases - even after a query has been executed in the database, QuerySet normally tries to make uses of chunking between the database to avoid loading all objects into memory before you need them. Also remember that, as always with QuerySets, any subsequent chained methods will ignore previously cached results, and retrieve data using a fresh database query. So, if you write the following: which imply a different database query will ignore previously cached results, and retrieve data using a fresh database query. So, if you write the following: >>> pizzas = Pizza.objects.prefetch_related('toppings') >>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas] Loading @@ -749,12 +764,6 @@ query. So, if you write the following: you - in fact it hurts performance, since you have done a database query that you haven't used. So use this feature with caution! The lookups that must be supplied to this method can be any attributes on the model instances which represent related queries that return multiple objects. This includes attributes representing the 'many' side of ``ForeignKey`` relationships, forward and reverse ``ManyToManyField`` attributes, and also any ``GenericRelations``. You can also use the normal join syntax to do related fields of related fields. Suppose we have an additional model to the example above:: Loading @@ -770,24 +779,40 @@ This will prefetch all pizzas belonging to restaurants, and all toppings belonging to those pizzas. This will result in a total of 3 database queries - one for the restaurants, one for the pizzas, and one for the toppings. >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings') >>> Restaurant.objects.prefetch_related('best_pizza__toppings') This will fetch the best pizza and all the toppings for the best pizza for each restaurant. This will be done in 2 database queries - one for the restaurants and 'best pizzas' combined (achieved through use of ``select_related``), and one for the toppings. restaurant. This will be done in 3 database queries - one for the restaurants, one for the 'best pizzas', and one for one for the toppings. Of course, the ``best_pizza`` relationship could also be fetched using ``select_related`` to reduce the query count to 2: >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings') Since the prefetch is executed after the main query (which includes the joins needed by ``select_related``), it is able to detect that the ``best_pizza`` objects have already been fetched, and it will skip fetching them again. Chaining ``prefetch_related`` calls will accumulate the fields that should have this behavior applied. To clear any ``prefetch_related`` behavior, pass `None` as a parameter:: Chaining ``prefetch_related`` calls will accumulate the lookups that are prefetched. To clear any ``prefetch_related`` behavior, pass `None` as a parameter:: >>> non_prefetched = qs.prefetch_related(None) One difference when using ``prefetch_related`` is that, in some circumstances, objects created by a query can be shared between the different objects that they are related to i.e. a single Python model instance can appear at more than one point in the tree of objects that are returned. Normally this behavior will not be a problem, and will in fact save both memory and CPU time. One difference to note when using ``prefetch_related`` is that objects created by a query can be shared between the different objects that they are related to i.e. a single Python model instance can appear at more than one point in the tree of objects that are returned. This will normally happen with foreign key relationships. Typically this behavior will not be a problem, and will in fact save both memory and CPU time. While ``prefetch_related`` supports prefetching ``GenericForeignKey`` relationships, the number of queries will depend on the data. Since a ``GenericForeignKey`` can reference data in multiple tables, one query per table referenced is needed, rather than one query for all the items. There could be additional queries on the ``ContentType`` table if the relevant rows have not already been fetched. extra ~~~~~ Loading Loading
django/contrib/contenttypes/generic.py +51 −1 Original line number Diff line number Diff line Loading @@ -2,7 +2,10 @@ Classes allowing "generic" relations through ContentType and object-id fields. """ from collections import defaultdict from functools import partial from operator import attrgetter from django.core.exceptions import ObjectDoesNotExist from django.db import connection from django.db.models import signals Loading Loading @@ -59,6 +62,49 @@ class GenericForeignKey(object): # This should never happen. I love comments like this, don't you? raise Exception("Impossible arguments to GFK.get_content_type!") def get_prefetch_query_set(self, instances): # For efficiency, group the instances by content type and then do one # query per model fk_dict = defaultdict(list) # We need one instance for each group in order to get the right db: instance_dict = {} ct_attname = self.model._meta.get_field(self.ct_field).get_attname() for instance in instances: # We avoid looking for values if either ct_id or fkey value is None ct_id = getattr(instance, ct_attname) if ct_id is not None: fk_val = getattr(instance, self.fk_field) if fk_val is not None: fk_dict[ct_id].append(fk_val) instance_dict[ct_id] = instance ret_val = [] for ct_id, fkeys in fk_dict.items(): instance = instance_dict[ct_id] ct = self.get_content_type(id=ct_id, using=instance._state.db) ret_val.extend(ct.get_all_objects_for_this_type(pk__in=fkeys)) # For doing the join in Python, we have to match both the FK val and the # content type, so the 'attr' vals we return need to be callables that # will return a (fk, class) pair. def gfk_key(obj): ct_id = getattr(obj, ct_attname) if ct_id is None: return None else: return (getattr(obj, self.fk_field), self.get_content_type(id=ct_id, using=obj._state.db).model_class()) return (ret_val, lambda obj: (obj._get_pk_val(), obj.__class__), gfk_key, True, self.cache_attr) def is_cached(self, instance): return hasattr(instance, self.cache_attr) def __get__(self, instance, instance_type=None): if instance is None: return self Loading Loading @@ -282,7 +328,11 @@ def create_generic_related_manager(superclass): [obj._get_pk_val() for obj in instances] } qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query) return (qs, self.object_id_field_name, 'pk') return (qs, attrgetter(self.object_id_field_name), lambda obj: obj._get_pk_val(), False, self.prefetch_cache_name) def add(self, *objs): for obj in objs: Loading
django/contrib/contenttypes/models.py +6 −0 Original line number Diff line number Diff line Loading @@ -113,5 +113,11 @@ class ContentType(models.Model): """ return self.model_class()._base_manager.using(self._state.db).get(**kwargs) def get_all_objects_for_this_type(self, **kwargs): """ Returns all objects of this type for the keyword arguments given. """ return self.model_class()._base_manager.using(self._state.db).filter(**kwargs) def natural_key(self): return (self.app_label, self.model)
django/db/models/fields/related.py +60 −26 Original line number Diff line number Diff line from operator import attrgetter from django.db import connection, router from django.db.backends import util from django.db.models import signals, get_model Loading Loading @@ -227,6 +229,22 @@ class SingleRelatedObjectDescriptor(object): self.related = related self.cache_name = related.get_cache_name() def is_cached(self, instance): return hasattr(instance, self.cache_name) def get_query_set(self, **db_hints): db = router.db_for_read(self.related.model, **db_hints) return self.related.model._base_manager.using(db) def get_prefetch_query_set(self, instances): vals = [instance._get_pk_val() for instance in instances] params = {'%s__pk__in' % self.related.field.name: vals} return (self.get_query_set(), attrgetter(self.related.field.attname), lambda obj: obj._get_pk_val(), True, self.cache_name) def __get__(self, instance, instance_type=None): if instance is None: return self Loading @@ -234,8 +252,7 @@ class SingleRelatedObjectDescriptor(object): return getattr(instance, self.cache_name) except AttributeError: params = {'%s__pk' % self.related.field.name: instance._get_pk_val()} db = router.db_for_read(self.related.model, instance=instance) rel_obj = self.related.model._base_manager.using(db).get(**params) rel_obj = self.get_query_set(instance=instance).get(**params) setattr(instance, self.cache_name, rel_obj) return rel_obj Loading Loading @@ -283,14 +300,40 @@ class ReverseSingleRelatedObjectDescriptor(object): # ReverseSingleRelatedObjectDescriptor instance. def __init__(self, field_with_rel): self.field = field_with_rel self.cache_name = self.field.get_cache_name() def is_cached(self, instance): return hasattr(instance, self.cache_name) def get_query_set(self, **db_hints): db = router.db_for_read(self.field.rel.to, **db_hints) rel_mgr = self.field.rel.to._default_manager # If the related manager indicates that it should be used for # related fields, respect that. if getattr(rel_mgr, 'use_for_related_fields', False): return rel_mgr.using(db) else: return QuerySet(self.field.rel.to).using(db) def get_prefetch_query_set(self, instances): vals = [getattr(instance, self.field.attname) for instance in instances] other_field = self.field.rel.get_related_field() if other_field.rel: params = {'%s__pk__in' % self.field.rel.field_name: vals} else: params = {'%s__in' % self.field.rel.field_name: vals} return (self.get_query_set().filter(**params), attrgetter(self.field.rel.field_name), attrgetter(self.field.attname), True, self.cache_name) def __get__(self, instance, instance_type=None): if instance is None: return self cache_name = self.field.get_cache_name() try: return getattr(instance, cache_name) return getattr(instance, self.cache_name) except AttributeError: val = getattr(instance, self.field.attname) if val is None: Loading @@ -303,16 +346,9 @@ class ReverseSingleRelatedObjectDescriptor(object): params = {'%s__pk' % self.field.rel.field_name: val} else: params = {'%s__exact' % self.field.rel.field_name: val} # If the related manager indicates that it should be used for # related fields, respect that. rel_mgr = self.field.rel.to._default_manager db = router.db_for_read(self.field.rel.to, instance=instance) if getattr(rel_mgr, 'use_for_related_fields', False): rel_obj = rel_mgr.using(db).get(**params) else: rel_obj = QuerySet(self.field.rel.to).using(db).get(**params) setattr(instance, cache_name, rel_obj) qs = self.get_query_set(instance=instance) rel_obj = qs.get(**params) setattr(instance, self.cache_name, rel_obj) return rel_obj def __set__(self, instance, value): Loading Loading @@ -425,15 +461,15 @@ class ForeignRelatedObjectsDescriptor(object): return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters) def get_prefetch_query_set(self, instances): """ Return a queryset that does the bulk lookup needed by prefetch_related functionality. """ db = self._db or router.db_for_read(self.model) query = {'%s__%s__in' % (rel_field.name, attname): [getattr(obj, attname) for obj in instances]} qs = super(RelatedManager, self).get_query_set().using(db).filter(**query) return (qs, rel_field.get_attname(), attname) return (qs, attrgetter(rel_field.get_attname()), attrgetter(attname), False, rel_field.related_query_name()) def add(self, *objs): for obj in objs: Loading Loading @@ -507,12 +543,6 @@ def create_many_related_manager(superclass, rel): return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters) def get_prefetch_query_set(self, instances): """ Returns a tuple: (queryset of instances of self.model that are related to passed in instances attr of returned instances needed for matching attr of passed in instances needed for matching) """ from django.db import connections db = self._db or router.db_for_read(self.model) query = {'%s__pk__in' % self.query_field_name: Loading @@ -534,7 +564,11 @@ def create_many_related_manager(superclass, rel): qs = qs.extra(select={'_prefetch_related_val': '%s.%s' % (qn(join_table), qn(source_col))}) select_attname = fk.rel.get_related_field().get_attname() return (qs, '_prefetch_related_val', select_attname) return (qs, attrgetter('_prefetch_related_val'), attrgetter(select_attname), False, self.prefetch_cache_name) # If the ManyToMany relation has an intermediary model, # the add and remove methods do not exist. Loading
django/db/models/query.py +97 −28 Original line number Diff line number Diff line Loading @@ -1612,36 +1612,42 @@ def prefetch_related_objects(result_cache, related_lookups): break # Descend down tree try: rel_obj = getattr(obj_list[0], attr) except AttributeError: # We assume that objects retrieved are homogenous (which is the premise # of prefetch_related), so what applies to first object applies to all. first_obj = obj_list[0] prefetcher, attr_found, is_fetched = get_prefetcher(first_obj, attr) if not attr_found: raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid " "parameter to prefetch_related()" % (attr, obj_list[0].__class__.__name__, lookup)) (attr, first_obj.__class__.__name__, lookup)) can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set') if level == len(attrs) - 1 and not can_prefetch: # Last one, this *must* resolve to a related manager. raise ValueError("'%s' does not resolve to a supported 'many related" " manager' for model %s - this is an invalid" " parameter to prefetch_related()." % (lookup, model.__name__)) if level == len(attrs) - 1 and prefetcher is None: # Last one, this *must* resolve to something that supports # prefetching, otherwise there is no point adding it and the # developer asking for it has made a mistake. raise ValueError("'%s' does not resolve to a item that supports " "prefetching - this is an invalid parameter to " "prefetch_related()." % lookup) if can_prefetch: if prefetcher is not None and not is_fetched: # Check we didn't do this already current_lookup = LOOKUP_SEP.join(attrs[0:level+1]) if current_lookup in done_queries: obj_list = done_queries[current_lookup] else: relmanager = rel_obj obj_list, additional_prl = prefetch_one_level(obj_list, relmanager, attr) obj_list, additional_prl = prefetch_one_level(obj_list, prefetcher, attr) for f in additional_prl: new_prl = LOOKUP_SEP.join([current_lookup, f]) related_lookups.append(new_prl) done_queries[current_lookup] = obj_list else: # Assume we've got some singly related object. We replace # the current list of parent objects with that list. # Either a singly related object that has already been fetched # (e.g. via select_related), or hopefully some other property # that doesn't support prefetching but needs to be traversed. # We replace the current list of parent objects with that list. obj_list = [getattr(obj, attr) for obj in obj_list] # Filter out 'None' so that we can continue with nullable Loading @@ -1649,18 +1655,73 @@ def prefetch_related_objects(result_cache, related_lookups): obj_list = [obj for obj in obj_list if obj is not None] def prefetch_one_level(instances, relmanager, attname): def get_prefetcher(instance, attr): """ For the attribute 'attr' on the given instance, finds an object that has a get_prefetch_query_set(). Return a 3 tuple containing: (the object with get_prefetch_query_set (or None), a boolean that is False if the attribute was not found at all, a boolean that is True if the attribute has already been fetched) """ prefetcher = None attr_found = False is_fetched = False # For singly related objects, we have to avoid getting the attribute # from the object, as this will trigger the query. So we first try # on the class, in order to get the descriptor object. rel_obj_descriptor = getattr(instance.__class__, attr, None) if rel_obj_descriptor is None: try: rel_obj = getattr(instance, attr) attr_found = True except AttributeError: pass else: attr_found = True if rel_obj_descriptor: # singly related object, descriptor object has the # get_prefetch_query_set() method. if hasattr(rel_obj_descriptor, 'get_prefetch_query_set'): prefetcher = rel_obj_descriptor if rel_obj_descriptor.is_cached(instance): is_fetched = True else: # descriptor doesn't support prefetching, so we go ahead and get # the attribute on the instance rather than the class to # support many related managers rel_obj = getattr(instance, attr) if hasattr(rel_obj, 'get_prefetch_query_set'): prefetcher = rel_obj return prefetcher, attr_found, is_fetched def prefetch_one_level(instances, prefetcher, attname): """ Helper function for prefetch_related_objects Runs prefetches on all instances using the manager relmanager, assigning results to queryset against instance.attname. Runs prefetches on all instances using the prefetcher object, assigning results to relevant caches in instance. The prefetched objects are returned, along with any additional prefetches that must be done due to prefetch_related lookups found from default managers. """ rel_qs, rel_obj_attr, instance_attr = relmanager.get_prefetch_query_set(instances) # prefetcher must have a method get_prefetch_query_set() which takes a list # of instances, and returns a tuple: # (queryset of instances of self.model that are related to passed in instances, # callable that gets value to be matched for returned instances, # callable that gets value to be matched for passed in instances, # boolean that is True for singly related objects, # cache name to assign to). # The 'values to be matched' must be hashable as they will be used # in a dictionary. rel_qs, rel_obj_attr, instance_attr, single, cache_name =\ prefetcher.get_prefetch_query_set(instances) # We have to handle the possibility that the default manager itself added # prefetch_related lookups to the QuerySet we just got back. We don't want to # trigger the prefetch_related functionality by evaluating the query. Loading @@ -1676,17 +1737,25 @@ def prefetch_one_level(instances, relmanager, attname): rel_obj_cache = {} for rel_obj in all_related_objects: rel_attr_val = getattr(rel_obj, rel_obj_attr) rel_attr_val = rel_obj_attr(rel_obj) if rel_attr_val not in rel_obj_cache: rel_obj_cache[rel_attr_val] = [] rel_obj_cache[rel_attr_val].append(rel_obj) for obj in instances: instance_attr_val = instance_attr(obj) vals = rel_obj_cache.get(instance_attr_val, []) if single: # Need to assign to single cache on instance if vals: setattr(obj, cache_name, vals[0]) else: # Multi, attribute represents a manager with an .all() method that # returns a QuerySet qs = getattr(obj, attname).all() instance_attr_val = getattr(obj, instance_attr) qs._result_cache = rel_obj_cache.get(instance_attr_val, []) qs._result_cache = vals # We don't want the individual qs doing prefetch_related now, since we # have merged this into the current work. qs._prefetch_done = True obj._prefetched_objects_cache[attname] = qs obj._prefetched_objects_cache[cache_name] = qs return all_related_objects, additional_prl
docs/ref/models/querysets.txt +57 −32 Original line number Diff line number Diff line Loading @@ -696,14 +696,26 @@ prefetch_related .. versionadded:: 1.4 Returns a ``QuerySet`` that will automatically retrieve, in a single batch, related many-to-many and many-to-one objects for each of the specified lookups. This is similar to ``select_related`` for the 'many related objects' case, but note that ``prefetch_related`` causes a separate query to be issued for each set of related objects that you request, unlike ``select_related`` which modifies the original query with joins in order to get the related objects. With ``prefetch_related``, the additional queries are done as soon as the QuerySet begins to be evaluated. related objects for each of the specified lookups. This has a similar purpose to ``select_related``, in that both are designed to stop the deluge of database queries that is caused by accessing related objects, but the strategy is quite different. ``select_related`` works by creating a SQL join and including the fields of the related object in the SELECT statement. For this reason, ``select_related`` gets the related objects in the same database query. However, to avoid the much larger result set that would result from joining across a 'many' relationship, ``select_related`` is limited to single-valued relationships - foreign key and one-to-one. ``prefetch_related``, on the other hand, does a separate lookup for each relationship, and does the 'joining' in Python. This allows it to prefetch many-to-many and many-to-one objects, which cannot be done using ``select_related``, in addition to the foreign key and one-to-one relationships that are supported by ``select_related``. It also supports prefetching of :class:`~django.contrib.contenttypes.generic.GenericRelation` and :class:`~django.contrib.contenttypes.generic.GenericForeignKey`. For example, suppose you have these models:: Loading Loading @@ -733,14 +745,17 @@ All the relevant toppings will be fetched in a single query, and used to make ``QuerySets`` that have a pre-filled cache of the relevant results. These ``QuerySets`` are then used in the ``self.toppings.all()`` calls. Please note that use of ``prefetch_related`` will mean that the additional queries run will **always** be executed - even if you never use the related objects - and it always fully populates the result cache on the primary ``QuerySet`` (which can sometimes be avoided in other cases). The additional queries are executed after the QuerySet has begun to be evaluated and the primary query has been executed. Note that the result cache of the primary QuerySet and all specified related objects will then be fully loaded into memory, which is often avoided in other cases - even after a query has been executed in the database, QuerySet normally tries to make uses of chunking between the database to avoid loading all objects into memory before you need them. Also remember that, as always with QuerySets, any subsequent chained methods will ignore previously cached results, and retrieve data using a fresh database query. So, if you write the following: which imply a different database query will ignore previously cached results, and retrieve data using a fresh database query. So, if you write the following: >>> pizzas = Pizza.objects.prefetch_related('toppings') >>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas] Loading @@ -749,12 +764,6 @@ query. So, if you write the following: you - in fact it hurts performance, since you have done a database query that you haven't used. So use this feature with caution! The lookups that must be supplied to this method can be any attributes on the model instances which represent related queries that return multiple objects. This includes attributes representing the 'many' side of ``ForeignKey`` relationships, forward and reverse ``ManyToManyField`` attributes, and also any ``GenericRelations``. You can also use the normal join syntax to do related fields of related fields. Suppose we have an additional model to the example above:: Loading @@ -770,24 +779,40 @@ This will prefetch all pizzas belonging to restaurants, and all toppings belonging to those pizzas. This will result in a total of 3 database queries - one for the restaurants, one for the pizzas, and one for the toppings. >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings') >>> Restaurant.objects.prefetch_related('best_pizza__toppings') This will fetch the best pizza and all the toppings for the best pizza for each restaurant. This will be done in 2 database queries - one for the restaurants and 'best pizzas' combined (achieved through use of ``select_related``), and one for the toppings. restaurant. This will be done in 3 database queries - one for the restaurants, one for the 'best pizzas', and one for one for the toppings. Of course, the ``best_pizza`` relationship could also be fetched using ``select_related`` to reduce the query count to 2: >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings') Since the prefetch is executed after the main query (which includes the joins needed by ``select_related``), it is able to detect that the ``best_pizza`` objects have already been fetched, and it will skip fetching them again. Chaining ``prefetch_related`` calls will accumulate the fields that should have this behavior applied. To clear any ``prefetch_related`` behavior, pass `None` as a parameter:: Chaining ``prefetch_related`` calls will accumulate the lookups that are prefetched. To clear any ``prefetch_related`` behavior, pass `None` as a parameter:: >>> non_prefetched = qs.prefetch_related(None) One difference when using ``prefetch_related`` is that, in some circumstances, objects created by a query can be shared between the different objects that they are related to i.e. a single Python model instance can appear at more than one point in the tree of objects that are returned. Normally this behavior will not be a problem, and will in fact save both memory and CPU time. One difference to note when using ``prefetch_related`` is that objects created by a query can be shared between the different objects that they are related to i.e. a single Python model instance can appear at more than one point in the tree of objects that are returned. This will normally happen with foreign key relationships. Typically this behavior will not be a problem, and will in fact save both memory and CPU time. While ``prefetch_related`` supports prefetching ``GenericForeignKey`` relationships, the number of queries will depend on the data. Since a ``GenericForeignKey`` can reference data in multiple tables, one query per table referenced is needed, rather than one query for all the items. There could be additional queries on the ``ContentType`` table if the relevant rows have not already been fetched. extra ~~~~~ Loading