Fixed #17144 -- MySQL again groups by PK only (cafb2669) · Commits · Dom Sekotill / django

django/db/models/sql/compiler.py

+35 −31

Original line number	Diff line number	Diff line
		@@ -103,21 +103,12 @@ class SQLCompiler(object):
		result.append('WHERE %s' % where)
		params.extend(w_params)

		grouping, gb_params = self.get_grouping()
		grouping, gb_params = self.get_grouping(ordering_group_by)
		if grouping:
		if distinct_fields:
		raise NotImplementedError(
		"annotate() + distinct(fields) not implemented.")
		if ordering:
		# If the backend can't group by PK (i.e., any database
		# other than MySQL), then any fields mentioned in the
		# ordering clause needs to be in the group by clause.
		if not self.connection.features.allows_group_by_pk:
		for col, col_params in ordering_group_by:
		if col not in grouping:
		grouping.append(str(col))
		gb_params.extend(col_params)
		else:
		if not ordering:
		ordering = self.connection.ops.force_no_ordering()
		result.append('GROUP BY %s' % ', '.join(grouping))
		params.extend(gb_params)
		@@ -378,7 +369,7 @@ class SQLCompiler(object):
		else:
		order = asc
		result.append('%s %s' % (field, order))
		group_by.append((field, []))
		group_by.append((str(field), []))
		continue
		col, order = get_order_dir(field, asc)
		if col in self.query.aggregate_select:
		@@ -538,39 +529,52 @@ class SQLCompiler(object):
		first = False
		return result, []

		def get_grouping(self):
		def get_grouping(self, ordering_group_by):
		"""
		Returns a tuple representing the SQL elements in the "group by" clause.
		"""
		qn = self.quote_name_unless_alias
		result, params = [], []
		if self.query.group_by is not None:
		if (len(self.query.model._meta.fields) == len(self.query.select) and
		self.connection.features.allows_group_by_pk):
		select_cols = self.query.select + self.query.related_select_cols
		# Just the column, not the fields.
		select_cols = [s[0] for s in select_cols]
		if (len(self.query.model._meta.fields) == len(self.query.select)
		and self.connection.features.allows_group_by_pk):
		self.query.group_by = [
		(self.query.model._meta.db_table, self.query.model._meta.pk.column)
		]

		group_by = self.query.group_by or []

		extra_selects = []
		for extra_select, extra_params in six.itervalues(self.query.extra_select):
		extra_selects.append(extra_select)
		params.extend(extra_params)
		select_cols = [s.col for s in self.query.select]
		related_select_cols = [s.col for s in self.query.related_select_cols]
		cols = (group_by + select_cols + related_select_cols + extra_selects)
		select_cols = []
		seen = set()
		cols = self.query.group_by + select_cols
		for col in cols:
		if col in seen:
		continue
		seen.add(col)
		if isinstance(col, (list, tuple)):
		result.append('%s.%s' % (qn(col[0]), qn(col[1])))
		sql = '%s.%s' % (qn(col[0]), qn(col[1]))
		elif hasattr(col, 'as_sql'):
		result.append(col.as_sql(qn, self.connection))
		sql = col.as_sql(qn, self.connection)
		else:
		result.append('(%s)' % str(col))
		sql = '(%s)' % str(col)
		if sql not in seen:
		result.append(sql)
		seen.add(sql)

		# Still, we need to add all stuff in ordering (except if the backend can
		# group by just by PK).
		if ordering_group_by and not self.connection.features.allows_group_by_pk:
		for order, order_params in ordering_group_by:
		# Even if we have seen the same SQL string, it might have
		# different params, so, we add same SQL in "has params" case.
		if order not in seen or params:
		result.append(order)
		params.extend(order_params)
		seen.add(order)

		# Unconditionally add the extra_select items.
		for extra_select, extra_params in self.query.extra_select.values():
		sql = '(%s)' % str(extra_select)
		result.append(sql)
		params.extend(extra_params)

		return result, params

		def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,

tests/regressiontests/aggregation_regress/tests.py

+90 −1

Original line number	Diff line number	Diff line
		@@ -470,7 +470,7 @@ class AggregationTests(TestCase):
		# Regression for #15709 - Ensure each group_by field only exists once
		# per query
		qs = Book.objects.values('publisher').annotate(max_pages=Max('pages')).order_by()
		grouping, gb_params = qs.query.get_compiler(qs.db).get_grouping()
		grouping, gb_params = qs.query.get_compiler(qs.db).get_grouping([])
		self.assertEqual(len(grouping), 1)

		def test_duplicate_alias(self):
		@@ -889,3 +889,92 @@ class AggregationTests(TestCase):
		self.assertIs(qs.query.alias_map['aggregation_regress_book'].join_type, None)
		# Check that the query executes without problems.
		self.assertEqual(len(qs.exclude(publisher=-1)), 6)

		@skipUnlessDBFeature("allows_group_by_pk")
		def test_aggregate_duplicate_columns(self):
		# Regression test for #17144

		results = Author.objects.annotate(num_contacts=Count('book_contact_set'))

		# There should only be one GROUP BY clause, for the `id` column.
		# `name` and `age` should not be grouped on.
		grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
		self.assertEqual(len(grouping), 1)
		assert 'id' in grouping[0]
		assert 'name' not in grouping[0]
		assert 'age' not in grouping[0]

		# The query group_by property should also only show the `id`.
		self.assertEqual(results.query.group_by, [('aggregation_regress_author', 'id')])

		# Ensure that we get correct results.
		self.assertEqual(
		[(a.name, a.num_contacts) for a in results.order_by('name')],
		[
		('Adrian Holovaty', 1),
		('Brad Dayley', 1),
		('Jacob Kaplan-Moss', 0),
		('James Bennett', 1),
		('Jeffrey Forcier', 1),
		('Paul Bissex', 0),
		('Peter Norvig', 2),
		('Stuart Russell', 0),
		('Wesley J. Chun', 0),
		]
		)

		@skipUnlessDBFeature("allows_group_by_pk")
		def test_aggregate_duplicate_columns_only(self):
		# Works with only() too.
		results = Author.objects.only('id', 'name').annotate(num_contacts=Count('book_contact_set'))
		grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
		self.assertEqual(len(grouping), 1)
		assert 'id' in grouping[0]
		assert 'name' not in grouping[0]
		assert 'age' not in grouping[0]

		# The query group_by property should also only show the `id`.
		self.assertEqual(results.query.group_by, [('aggregation_regress_author', 'id')])

		# Ensure that we get correct results.
		self.assertEqual(
		[(a.name, a.num_contacts) for a in results.order_by('name')],
		[
		('Adrian Holovaty', 1),
		('Brad Dayley', 1),
		('Jacob Kaplan-Moss', 0),
		('James Bennett', 1),
		('Jeffrey Forcier', 1),
		('Paul Bissex', 0),
		('Peter Norvig', 2),
		('Stuart Russell', 0),
		('Wesley J. Chun', 0),
		]
		)

		@skipUnlessDBFeature("allows_group_by_pk")
		def test_aggregate_duplicate_columns_select_related(self):
		# And select_related()
		results = Book.objects.select_related('contact').annotate(
		num_authors=Count('authors'))
		grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
		self.assertEqual(len(grouping), 1)
		assert 'id' in grouping[0]
		assert 'name' not in grouping[0]
		assert 'contact' not in grouping[0]

		# The query group_by property should also only show the `id`.
		self.assertEqual(results.query.group_by, [('aggregation_regress_book', 'id')])

		# Ensure that we get correct results.
		self.assertEqual(
		[(b.name, b.num_authors) for b in results.order_by('name')],
		[
		('Artificial Intelligence: A Modern Approach', 2),
		('Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1),
		('Practical Django Projects', 1),
		('Python Web Development with Django', 3),
		('Sams Teach Yourself Django in 24 Hours', 1),
		('The Definitive Guide to Django: Web Development Done Right', 2)
		]
		)