Commit cafb2669 authored by Anssi Kääriäinen's avatar Anssi Kääriäinen
Browse files

Fixed #17144 -- MySQL again groups by PK only

Thanks to Christian Oudard for the report and tests.
parent b1ac329b
Loading
Loading
Loading
Loading
+35 −31
Original line number Diff line number Diff line
@@ -103,21 +103,12 @@ class SQLCompiler(object):
            result.append('WHERE %s' % where)
            params.extend(w_params)

        grouping, gb_params = self.get_grouping()
        grouping, gb_params = self.get_grouping(ordering_group_by)
        if grouping:
            if distinct_fields:
                raise NotImplementedError(
                    "annotate() + distinct(fields) not implemented.")
            if ordering:
                # If the backend can't group by PK (i.e., any database
                # other than MySQL), then any fields mentioned in the
                # ordering clause needs to be in the group by clause.
                if not self.connection.features.allows_group_by_pk:
                    for col, col_params in ordering_group_by:
                        if col not in grouping:
                            grouping.append(str(col))
                            gb_params.extend(col_params)
            else:
            if not ordering:
                ordering = self.connection.ops.force_no_ordering()
            result.append('GROUP BY %s' % ', '.join(grouping))
            params.extend(gb_params)
@@ -378,7 +369,7 @@ class SQLCompiler(object):
                else:
                    order = asc
                result.append('%s %s' % (field, order))
                group_by.append((field, []))
                group_by.append((str(field), []))
                continue
            col, order = get_order_dir(field, asc)
            if col in self.query.aggregate_select:
@@ -538,39 +529,52 @@ class SQLCompiler(object):
                first = False
        return result, []

    def get_grouping(self):
    def get_grouping(self, ordering_group_by):
        """
        Returns a tuple representing the SQL elements in the "group by" clause.
        """
        qn = self.quote_name_unless_alias
        result, params = [], []
        if self.query.group_by is not None:
            if (len(self.query.model._meta.fields) == len(self.query.select) and
                self.connection.features.allows_group_by_pk):
            select_cols = self.query.select + self.query.related_select_cols
            # Just the column, not the fields.
            select_cols = [s[0] for s in select_cols]
            if (len(self.query.model._meta.fields) == len(self.query.select)
                    and self.connection.features.allows_group_by_pk):
                self.query.group_by = [
                    (self.query.model._meta.db_table, self.query.model._meta.pk.column)
                ]

            group_by = self.query.group_by or []

            extra_selects = []
            for extra_select, extra_params in six.itervalues(self.query.extra_select):
                extra_selects.append(extra_select)
                params.extend(extra_params)
            select_cols = [s.col for s in self.query.select]
            related_select_cols = [s.col for s in self.query.related_select_cols]
            cols = (group_by + select_cols + related_select_cols + extra_selects)
                select_cols = []
            seen = set()
            cols = self.query.group_by + select_cols
            for col in cols:
                if col in seen:
                    continue
                seen.add(col)
                if isinstance(col, (list, tuple)):
                    result.append('%s.%s' % (qn(col[0]), qn(col[1])))
                    sql = '%s.%s' % (qn(col[0]), qn(col[1]))
                elif hasattr(col, 'as_sql'):
                    result.append(col.as_sql(qn, self.connection))
                    sql = col.as_sql(qn, self.connection)
                else:
                    result.append('(%s)' % str(col))
                    sql = '(%s)' % str(col)
                if sql not in seen:
                    result.append(sql)
                    seen.add(sql)

            # Still, we need to add all stuff in ordering (except if the backend can
            # group by just by PK).
            if ordering_group_by and not self.connection.features.allows_group_by_pk:
                for order, order_params in ordering_group_by:
                    # Even if we have seen the same SQL string, it might have
                    # different params, so, we add same SQL in "has params" case.
                    if order not in seen or params:
                        result.append(order)
                        params.extend(order_params)
                        seen.add(order)

            # Unconditionally add the extra_select items.
            for extra_select, extra_params in self.query.extra_select.values():
                sql = '(%s)' % str(extra_select)
                result.append(sql)
                params.extend(extra_params)

        return result, params

    def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
+90 −1
Original line number Diff line number Diff line
@@ -470,7 +470,7 @@ class AggregationTests(TestCase):
        # Regression for #15709 - Ensure each group_by field only exists once
        # per query
        qs = Book.objects.values('publisher').annotate(max_pages=Max('pages')).order_by()
        grouping, gb_params = qs.query.get_compiler(qs.db).get_grouping()
        grouping, gb_params = qs.query.get_compiler(qs.db).get_grouping([])
        self.assertEqual(len(grouping), 1)

    def test_duplicate_alias(self):
@@ -889,3 +889,92 @@ class AggregationTests(TestCase):
        self.assertIs(qs.query.alias_map['aggregation_regress_book'].join_type, None)
        # Check that the query executes without problems.
        self.assertEqual(len(qs.exclude(publisher=-1)), 6)

    @skipUnlessDBFeature("allows_group_by_pk")
    def test_aggregate_duplicate_columns(self):
        # Regression test for #17144

        results = Author.objects.annotate(num_contacts=Count('book_contact_set'))

        # There should only be one GROUP BY clause, for the `id` column.
        # `name` and `age` should not be grouped on.
        grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
        self.assertEqual(len(grouping), 1)
        assert 'id' in grouping[0]
        assert 'name' not in grouping[0]
        assert 'age' not in grouping[0]

        # The query group_by property should also only show the `id`.
        self.assertEqual(results.query.group_by, [('aggregation_regress_author', 'id')])

        # Ensure that we get correct results.
        self.assertEqual(
            [(a.name, a.num_contacts) for a in results.order_by('name')],
            [
                ('Adrian Holovaty', 1),
                ('Brad Dayley', 1),
                ('Jacob Kaplan-Moss', 0),
                ('James Bennett', 1),
                ('Jeffrey Forcier', 1),
                ('Paul Bissex', 0),
                ('Peter Norvig', 2),
                ('Stuart Russell', 0),
                ('Wesley J. Chun', 0),
            ]
        )

    @skipUnlessDBFeature("allows_group_by_pk")
    def test_aggregate_duplicate_columns_only(self):
        # Works with only() too.
        results = Author.objects.only('id', 'name').annotate(num_contacts=Count('book_contact_set'))
        grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
        self.assertEqual(len(grouping), 1)
        assert 'id' in grouping[0]
        assert 'name' not in grouping[0]
        assert 'age' not in grouping[0]

        # The query group_by property should also only show the `id`.
        self.assertEqual(results.query.group_by, [('aggregation_regress_author', 'id')])

        # Ensure that we get correct results.
        self.assertEqual(
            [(a.name, a.num_contacts) for a in results.order_by('name')],
            [
                ('Adrian Holovaty', 1),
                ('Brad Dayley', 1),
                ('Jacob Kaplan-Moss', 0),
                ('James Bennett', 1),
                ('Jeffrey Forcier', 1),
                ('Paul Bissex', 0),
                ('Peter Norvig', 2),
                ('Stuart Russell', 0),
                ('Wesley J. Chun', 0),
            ]
        )

    @skipUnlessDBFeature("allows_group_by_pk")
    def test_aggregate_duplicate_columns_select_related(self):
        # And select_related()
        results = Book.objects.select_related('contact').annotate(
            num_authors=Count('authors'))
        grouping, gb_params = results.query.get_compiler(using='default').get_grouping([])
        self.assertEqual(len(grouping), 1)
        assert 'id' in grouping[0]
        assert 'name' not in grouping[0]
        assert 'contact' not in grouping[0]

        # The query group_by property should also only show the `id`.
        self.assertEqual(results.query.group_by, [('aggregation_regress_book', 'id')])

        # Ensure that we get correct results.
        self.assertEqual(
            [(b.name, b.num_authors) for b in results.order_by('name')],
            [
                ('Artificial Intelligence: A Modern Approach', 2),
                ('Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1),
                ('Practical Django Projects', 1),
                ('Python Web Development with Django', 3),
                ('Sams Teach Yourself Django in 24 Hours', 1),
                ('The Definitive Guide to Django: Web Development Done Right', 2)
            ]
        )