Loading django/db/models/query.py +29 −120 Original line number Diff line number Diff line Loading @@ -20,11 +20,6 @@ from django.utils.functional import partition from django.utils import six from django.utils import timezone # Used to control how many objects are worked with at once in some cases (e.g. # when deleting objects). CHUNK_SIZE = 100 ITER_CHUNK_SIZE = CHUNK_SIZE # The maximum number of items to display in a QuerySet.__repr__ REPR_OUTPUT_SIZE = 20 Loading @@ -41,7 +36,6 @@ class QuerySet(object): self._db = using self.query = query or sql.Query(self.model) self._result_cache = None self._iter = None self._sticky_filter = False self._for_write = False self._prefetch_related_lookups = [] Loading @@ -58,7 +52,7 @@ class QuerySet(object): """ obj = self.__class__() for k, v in self.__dict__.items(): if k in ('_iter','_result_cache'): if k == '_result_cache': obj.__dict__[k] = None else: obj.__dict__[k] = copy.deepcopy(v, memo) Loading @@ -69,10 +63,8 @@ class QuerySet(object): Allows the QuerySet to be pickled. """ # Force the cache to be fully populated. len(self) self._fetch_all() obj_dict = self.__dict__.copy() obj_dict['_iter'] = None return obj_dict def __repr__(self): Loading @@ -82,95 +74,31 @@ class QuerySet(object): return repr(data) def __len__(self): # Since __len__ is called quite frequently (for example, as part of # list(qs), we make some effort here to be as efficient as possible # whilst not messing up any existing iterators against the QuerySet. if self._result_cache is None: if self._iter: self._result_cache = list(self._iter) else: self._result_cache = list(self.iterator()) elif self._iter: self._result_cache.extend(self._iter) if self._prefetch_related_lookups and not self._prefetch_done: self._prefetch_related_objects() self._fetch_all() return len(self._result_cache) def __iter__(self): if self._prefetch_related_lookups and not self._prefetch_done: # We need all the results in order to be able to do the prefetch # in one go. To minimize code duplication, we use the __len__ # code path which also forces this, and also does the prefetch len(self) if self._result_cache is None: self._iter = self.iterator() self._result_cache = [] if self._iter: return self._result_iter() # Python's list iterator is better than our version when we're just # iterating over the cache. """ The queryset iterator protocol uses three nested iterators in the default case: 1. sql.compiler:execute_sql() - Returns 100 rows at time (constants.GET_ITERATOR_CHUNK_SIZE) using cursor.fetchmany(). This part is responsible for doing some column masking, and returning the rows in chunks. 2. sql/compiler.results_iter() - Returns one row at time. At this point the rows are still just tuples. In some cases the return values are converted to Python values at this location (see resolve_columns(), resolve_aggregate()). 3. self.iterator() - Responsible for turning the rows into model objects. """ self._fetch_all() return iter(self._result_cache) def _result_iter(self): pos = 0 while 1: upper = len(self._result_cache) while pos < upper: yield self._result_cache[pos] pos = pos + 1 if not self._iter: raise StopIteration if len(self._result_cache) <= pos: self._fill_cache() def __bool__(self): if self._prefetch_related_lookups and not self._prefetch_done: # We need all the results in order to be able to do the prefetch # in one go. To minimize code duplication, we use the __len__ # code path which also forces this, and also does the prefetch len(self) if self._result_cache is not None: def __nonzero__(self): self._fetch_all() return bool(self._result_cache) try: next(iter(self)) except StopIteration: return False return True def __nonzero__(self): # Python 2 compatibility return type(self).__bool__(self) def __contains__(self, val): # The 'in' operator works without this method, due to __iter__. This # implementation exists only to shortcut the creation of Model # instances, by bailing out early if we find a matching element. pos = 0 if self._result_cache is not None: if val in self._result_cache: return True elif self._iter is None: # iterator is exhausted, so we have our answer return False # remember not to check these again: pos = len(self._result_cache) else: # We need to start filling the result cache out. The following # ensures that self._iter is not None and self._result_cache is not # None it = iter(self) # Carry on, one result at a time. while True: if len(self._result_cache) <= pos: self._fill_cache(num=1) if self._iter is None: # we ran out of items return False if self._result_cache[pos] == val: return True pos += 1 def __getitem__(self, k): """ Loading @@ -184,19 +112,6 @@ class QuerySet(object): "Negative indexing is not supported." if self._result_cache is not None: if self._iter is not None: # The result cache has only been partially populated, so we may # need to fill it out a bit more. if isinstance(k, slice): if k.stop is not None: # Some people insist on passing in strings here. bound = int(k.stop) else: bound = None else: bound = k + 1 if len(self._result_cache) < bound: self._fill_cache(bound - len(self._result_cache)) return self._result_cache[k] if isinstance(k, slice): Loading Loading @@ -370,7 +285,7 @@ class QuerySet(object): If the QuerySet is already fully cached this simply returns the length of the cached results set to avoid multiple SELECT COUNT(*) calls. """ if self._result_cache is not None and not self._iter: if self._result_cache is not None: return len(self._result_cache) return self.query.get_count(using=self.db) Loading Loading @@ -933,17 +848,11 @@ class QuerySet(object): c._setup_query() return c def _fill_cache(self, num=None): """ Fills the result cache with 'num' more entries (or until the results iterator is exhausted). """ if self._iter: try: for i in range(num or ITER_CHUNK_SIZE): self._result_cache.append(next(self._iter)) except StopIteration: self._iter = None def _fetch_all(self): if self._result_cache is None: self._result_cache = list(self.iterator()) if self._prefetch_related_lookups and not self._prefetch_done: self._prefetch_related_objects() def _next_is_sticky(self): """ Loading docs/releases/1.6.txt +19 −0 Original line number Diff line number Diff line Loading @@ -524,6 +524,25 @@ non-standard behavior has been preserved but moved to the model form field layer and occurs only when the associated widget is :class:`~django.forms.SelectMultiple` or a subclass. QuerySet iteration ~~~~~~~~~~~~~~~~~~ The ``QuerySet`` iteration was changed to immediately convert all fetched rows to ``Model`` objects. In Django 1.5 and earlier the fetched rows were converted to ``Model`` objects in chunks of 100. Existing code will work, but the amount of rows converted to objects might change in certain use cases. Such usages include partially looping over a queryset or any usage which ends up doing ``__bool__`` or ``__contains__``. Notably most database backends did fetch all the rows in one go already in 1.5. It is still possible to convert the fetched rows to ``Model`` objects lazily by using the :meth:`~django.db.models.query.QuerySet.iterator()` method. Miscellaneous ~~~~~~~~~~~~~ Loading tests/queries/tests.py +8 −36 Original line number Diff line number Diff line Loading @@ -9,7 +9,6 @@ from django.conf import settings from django.core.exceptions import FieldError from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS from django.db.models import Count, F, Q from django.db.models.query import ITER_CHUNK_SIZE from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode from django.db.models.sql.datastructures import EmptyResultSet from django.test import TestCase, skipUnlessDBFeature Loading Loading @@ -1211,16 +1210,6 @@ class Queries2Tests(TestCase): ordered=False ) def test_ticket7411(self): # Saving to db must work even with partially read result set in another # cursor. for num in range(2 * ITER_CHUNK_SIZE + 1): _ = Number.objects.create(num=num) for i, obj in enumerate(Number.objects.all()): obj.save() if i > 10: break def test_ticket7759(self): # Count should work with a partially read result set. count = Number.objects.count() Loading Loading @@ -1700,31 +1689,6 @@ class Queries6Tests(TestCase): ann1.notes.add(n1) ann2 = Annotation.objects.create(name='a2', tag=t4) # This next test used to cause really weird PostgreSQL behavior, but it was # only apparent much later when the full test suite ran. # - Yeah, it leaves global ITER_CHUNK_SIZE to 2 instead of 100... #@unittest.expectedFailure def test_slicing_and_cache_interaction(self): # We can do slicing beyond what is currently in the result cache, # too. # We need to mess with the implementation internals a bit here to decrease the # cache fill size so that we don't read all the results at once. from django.db.models import query query.ITER_CHUNK_SIZE = 2 qs = Tag.objects.all() # Fill the cache with the first chunk. self.assertTrue(bool(qs)) self.assertEqual(len(qs._result_cache), 2) # Query beyond the end of the cache and check that it is filled out as required. self.assertEqual(repr(qs[4]), '<Tag: t5>') self.assertEqual(len(qs._result_cache), 5) # But querying beyond the end of the result set will fail. self.assertRaises(IndexError, lambda: qs[100]) def test_parallel_iterators(self): # Test that parallel iterators work. qs = Tag.objects.all() Loading Loading @@ -2533,6 +2497,14 @@ class WhereNodeTest(TestCase): w = WhereNode(children=[empty_w, NothingNode()], connector='OR') self.assertRaises(EmptyResultSet, w.as_sql, qn, connection) class IteratorExceptionsTest(TestCase): def test_iter_exceptions(self): qs = ExtraInfo.objects.only('author') with self.assertRaises(AttributeError): list(qs) class NullJoinPromotionOrTest(TestCase): def setUp(self): self.d1 = ModelD.objects.create(name='foo') Loading Loading
django/db/models/query.py +29 −120 Original line number Diff line number Diff line Loading @@ -20,11 +20,6 @@ from django.utils.functional import partition from django.utils import six from django.utils import timezone # Used to control how many objects are worked with at once in some cases (e.g. # when deleting objects). CHUNK_SIZE = 100 ITER_CHUNK_SIZE = CHUNK_SIZE # The maximum number of items to display in a QuerySet.__repr__ REPR_OUTPUT_SIZE = 20 Loading @@ -41,7 +36,6 @@ class QuerySet(object): self._db = using self.query = query or sql.Query(self.model) self._result_cache = None self._iter = None self._sticky_filter = False self._for_write = False self._prefetch_related_lookups = [] Loading @@ -58,7 +52,7 @@ class QuerySet(object): """ obj = self.__class__() for k, v in self.__dict__.items(): if k in ('_iter','_result_cache'): if k == '_result_cache': obj.__dict__[k] = None else: obj.__dict__[k] = copy.deepcopy(v, memo) Loading @@ -69,10 +63,8 @@ class QuerySet(object): Allows the QuerySet to be pickled. """ # Force the cache to be fully populated. len(self) self._fetch_all() obj_dict = self.__dict__.copy() obj_dict['_iter'] = None return obj_dict def __repr__(self): Loading @@ -82,95 +74,31 @@ class QuerySet(object): return repr(data) def __len__(self): # Since __len__ is called quite frequently (for example, as part of # list(qs), we make some effort here to be as efficient as possible # whilst not messing up any existing iterators against the QuerySet. if self._result_cache is None: if self._iter: self._result_cache = list(self._iter) else: self._result_cache = list(self.iterator()) elif self._iter: self._result_cache.extend(self._iter) if self._prefetch_related_lookups and not self._prefetch_done: self._prefetch_related_objects() self._fetch_all() return len(self._result_cache) def __iter__(self): if self._prefetch_related_lookups and not self._prefetch_done: # We need all the results in order to be able to do the prefetch # in one go. To minimize code duplication, we use the __len__ # code path which also forces this, and also does the prefetch len(self) if self._result_cache is None: self._iter = self.iterator() self._result_cache = [] if self._iter: return self._result_iter() # Python's list iterator is better than our version when we're just # iterating over the cache. """ The queryset iterator protocol uses three nested iterators in the default case: 1. sql.compiler:execute_sql() - Returns 100 rows at time (constants.GET_ITERATOR_CHUNK_SIZE) using cursor.fetchmany(). This part is responsible for doing some column masking, and returning the rows in chunks. 2. sql/compiler.results_iter() - Returns one row at time. At this point the rows are still just tuples. In some cases the return values are converted to Python values at this location (see resolve_columns(), resolve_aggregate()). 3. self.iterator() - Responsible for turning the rows into model objects. """ self._fetch_all() return iter(self._result_cache) def _result_iter(self): pos = 0 while 1: upper = len(self._result_cache) while pos < upper: yield self._result_cache[pos] pos = pos + 1 if not self._iter: raise StopIteration if len(self._result_cache) <= pos: self._fill_cache() def __bool__(self): if self._prefetch_related_lookups and not self._prefetch_done: # We need all the results in order to be able to do the prefetch # in one go. To minimize code duplication, we use the __len__ # code path which also forces this, and also does the prefetch len(self) if self._result_cache is not None: def __nonzero__(self): self._fetch_all() return bool(self._result_cache) try: next(iter(self)) except StopIteration: return False return True def __nonzero__(self): # Python 2 compatibility return type(self).__bool__(self) def __contains__(self, val): # The 'in' operator works without this method, due to __iter__. This # implementation exists only to shortcut the creation of Model # instances, by bailing out early if we find a matching element. pos = 0 if self._result_cache is not None: if val in self._result_cache: return True elif self._iter is None: # iterator is exhausted, so we have our answer return False # remember not to check these again: pos = len(self._result_cache) else: # We need to start filling the result cache out. The following # ensures that self._iter is not None and self._result_cache is not # None it = iter(self) # Carry on, one result at a time. while True: if len(self._result_cache) <= pos: self._fill_cache(num=1) if self._iter is None: # we ran out of items return False if self._result_cache[pos] == val: return True pos += 1 def __getitem__(self, k): """ Loading @@ -184,19 +112,6 @@ class QuerySet(object): "Negative indexing is not supported." if self._result_cache is not None: if self._iter is not None: # The result cache has only been partially populated, so we may # need to fill it out a bit more. if isinstance(k, slice): if k.stop is not None: # Some people insist on passing in strings here. bound = int(k.stop) else: bound = None else: bound = k + 1 if len(self._result_cache) < bound: self._fill_cache(bound - len(self._result_cache)) return self._result_cache[k] if isinstance(k, slice): Loading Loading @@ -370,7 +285,7 @@ class QuerySet(object): If the QuerySet is already fully cached this simply returns the length of the cached results set to avoid multiple SELECT COUNT(*) calls. """ if self._result_cache is not None and not self._iter: if self._result_cache is not None: return len(self._result_cache) return self.query.get_count(using=self.db) Loading Loading @@ -933,17 +848,11 @@ class QuerySet(object): c._setup_query() return c def _fill_cache(self, num=None): """ Fills the result cache with 'num' more entries (or until the results iterator is exhausted). """ if self._iter: try: for i in range(num or ITER_CHUNK_SIZE): self._result_cache.append(next(self._iter)) except StopIteration: self._iter = None def _fetch_all(self): if self._result_cache is None: self._result_cache = list(self.iterator()) if self._prefetch_related_lookups and not self._prefetch_done: self._prefetch_related_objects() def _next_is_sticky(self): """ Loading
docs/releases/1.6.txt +19 −0 Original line number Diff line number Diff line Loading @@ -524,6 +524,25 @@ non-standard behavior has been preserved but moved to the model form field layer and occurs only when the associated widget is :class:`~django.forms.SelectMultiple` or a subclass. QuerySet iteration ~~~~~~~~~~~~~~~~~~ The ``QuerySet`` iteration was changed to immediately convert all fetched rows to ``Model`` objects. In Django 1.5 and earlier the fetched rows were converted to ``Model`` objects in chunks of 100. Existing code will work, but the amount of rows converted to objects might change in certain use cases. Such usages include partially looping over a queryset or any usage which ends up doing ``__bool__`` or ``__contains__``. Notably most database backends did fetch all the rows in one go already in 1.5. It is still possible to convert the fetched rows to ``Model`` objects lazily by using the :meth:`~django.db.models.query.QuerySet.iterator()` method. Miscellaneous ~~~~~~~~~~~~~ Loading
tests/queries/tests.py +8 −36 Original line number Diff line number Diff line Loading @@ -9,7 +9,6 @@ from django.conf import settings from django.core.exceptions import FieldError from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS from django.db.models import Count, F, Q from django.db.models.query import ITER_CHUNK_SIZE from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode from django.db.models.sql.datastructures import EmptyResultSet from django.test import TestCase, skipUnlessDBFeature Loading Loading @@ -1211,16 +1210,6 @@ class Queries2Tests(TestCase): ordered=False ) def test_ticket7411(self): # Saving to db must work even with partially read result set in another # cursor. for num in range(2 * ITER_CHUNK_SIZE + 1): _ = Number.objects.create(num=num) for i, obj in enumerate(Number.objects.all()): obj.save() if i > 10: break def test_ticket7759(self): # Count should work with a partially read result set. count = Number.objects.count() Loading Loading @@ -1700,31 +1689,6 @@ class Queries6Tests(TestCase): ann1.notes.add(n1) ann2 = Annotation.objects.create(name='a2', tag=t4) # This next test used to cause really weird PostgreSQL behavior, but it was # only apparent much later when the full test suite ran. # - Yeah, it leaves global ITER_CHUNK_SIZE to 2 instead of 100... #@unittest.expectedFailure def test_slicing_and_cache_interaction(self): # We can do slicing beyond what is currently in the result cache, # too. # We need to mess with the implementation internals a bit here to decrease the # cache fill size so that we don't read all the results at once. from django.db.models import query query.ITER_CHUNK_SIZE = 2 qs = Tag.objects.all() # Fill the cache with the first chunk. self.assertTrue(bool(qs)) self.assertEqual(len(qs._result_cache), 2) # Query beyond the end of the cache and check that it is filled out as required. self.assertEqual(repr(qs[4]), '<Tag: t5>') self.assertEqual(len(qs._result_cache), 5) # But querying beyond the end of the result set will fail. self.assertRaises(IndexError, lambda: qs[100]) def test_parallel_iterators(self): # Test that parallel iterators work. qs = Tag.objects.all() Loading Loading @@ -2533,6 +2497,14 @@ class WhereNodeTest(TestCase): w = WhereNode(children=[empty_w, NothingNode()], connector='OR') self.assertRaises(EmptyResultSet, w.as_sql, qn, connection) class IteratorExceptionsTest(TestCase): def test_iter_exceptions(self): qs = ExtraInfo.objects.only('author') with self.assertRaises(AttributeError): list(qs) class NullJoinPromotionOrTest(TestCase): def setUp(self): self.d1 = ModelD.objects.create(name='foo') Loading