Ticket #6422: distinct_on.3.diff

File distinct_on.3.diff, 12.1 KB (added by Taylor Mitchell, 9 years ago)
  • AUTHORS

    diff --git a/AUTHORS b/AUTHORS
    index 31a3300..c484692 100644
    a b answer newbie questions, and generally made Django that much better: 
    540540    Gasper Zejn <zejn@kiberpipa.org>
    541541    Jarek Zgoda <jarek.zgoda@gmail.com>
    542542    Cheng Zhang
     543    Jeffrey Gelens <jeffrey@gelens.org>
    543544
    544545A big THANK YOU goes to:
    545546
  • django/db/backends/__init__.py

    diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
    index 1c3bc7e..df371bd 100644
    a b class BaseDatabaseFeatures(object): 
    341341    supports_stddev = None
    342342    can_introspect_foreign_keys = None
    343343
     344    # Support for the DISTINCT ON clause
     345    can_distinct_on_fields = False
     346
    344347    def __init__(self, connection):
    345348        self.connection = connection
    346349
    class BaseDatabaseOperations(object): 
    494497        """
    495498        raise NotImplementedError('Full-text search is not implemented for this database backend')
    496499
     500    def distinct(self, db_table, fields):
     501        """
     502        Returns an SQL DISTINCT clause which removes duplicate rows from the
     503        result set. If any fields are given, only the given fields are being
     504        checked for duplicates.
     505        """
     506        if fields:
     507            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
     508        else:
     509            return 'DISTINCT'
     510
    497511    def last_executed_query(self, cursor, sql, params):
    498512        """
    499513        Returns a string of the query last executed by the given cursor, with
  • django/db/backends/postgresql_psycopg2/base.py

    diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
    index 6ed59a6..ec75bae 100644
    a b class DatabaseFeatures(BaseDatabaseFeatures): 
    7171    can_defer_constraint_checks = True
    7272    has_select_for_update = True
    7373    has_select_for_update_nowait = True
     74    can_distinct_on_fields = True
    7475
    7576
    7677class DatabaseWrapper(BaseDatabaseWrapper):
  • django/db/backends/postgresql_psycopg2/operations.py

    diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
    index d535ee3..6bc9bd3 100644
    a b class DatabaseOperations(BaseDatabaseOperations): 
    173173
    174174        return 63
    175175
     176    def distinct(self, db_table, fields):
     177        if fields:
     178            table_name = self.quote_name(db_table)
     179            fields = [table_name + "." + self.quote_name(field) for field in fields]
     180            return 'DISTINCT ON (%s)' % ', '.join(fields)
     181        else:
     182            return 'DISTINCT'
     183
    176184    def last_executed_query(self, cursor, sql, params):
    177185        # http://initd.org/psycopg/docs/cursor.html#cursor.query
    178186        # The query attribute is a Psycopg extension to the DB API 2.0.
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    index af62061..9a36310 100644
    a b class QuerySet(object): 
    665665        obj.query.add_ordering(*field_names)
    666666        return obj
    667667
    668     def distinct(self, true_or_false=True):
     668    def distinct(self, *field_names):
    669669        """
    670670        Returns a new QuerySet instance that will select only distinct results.
    671671        """
    672672        obj = self._clone()
    673         obj.query.distinct = true_or_false
     673        obj.query.add_distinct_fields(field_names)
     674        obj.query.distinct = True
     675
    674676        return obj
    675677
    676678    def extra(self, select=None, where=None, params=None, tables=None,
    class EmptyQuerySet(QuerySet): 
    10901092        """
    10911093        return self
    10921094
    1093     def distinct(self, true_or_false=True):
     1095    def distinct(self, fields=None):
    10941096        """
    10951097        Always returns EmptyQuerySet.
    10961098        """
  • django/db/models/sql/compiler.py

    diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
    index 05c19f3..fac3695 100644
    a b class SQLCompiler(object): 
    7474            params.extend(val[1])
    7575
    7676        result = ['SELECT']
     77
    7778        if self.query.distinct:
    78             result.append('DISTINCT')
     79            distinct_sql = self.connection.ops.distinct(
     80                self.query.model._meta.db_table, self.query.distinct_fields)
     81            result.append(distinct_sql)
     82
    7983        result.append(', '.join(out_cols + self.query.ordering_aliases))
    8084
    8185        result.append('FROM')
  • django/db/models/sql/query.py

    diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
    index 110e317..bc0c7f7 100644
    a b class Query(object): 
    125125        self.order_by = []
    126126        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
    127127        self.distinct = False
     128        self.distinct_fields = None
    128129        self.select_for_update = False
    129130        self.select_for_update_nowait = False
    130131        self.select_related = False
    class Query(object): 
    256257        obj.order_by = self.order_by[:]
    257258        obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
    258259        obj.distinct = self.distinct
     260        obj.distinct_fields = self.distinct_fields
    259261        obj.select_for_update = self.select_for_update
    260262        obj.select_for_update_nowait = self.select_for_update_nowait
    261263        obj.select_related = self.select_related
    class Query(object): 
    384386        Performs a COUNT() query using the current filter constraints.
    385387        """
    386388        obj = self.clone()
    387         if len(self.select) > 1 or self.aggregate_select:
     389        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
    388390            # If a select clause exists, then the query has already started to
    389391            # specify the columns that are to be returned.
    390392            # In this case, we need to use a subquery to evaluate the count.
    class Query(object): 
    15571559        self.select = []
    15581560        self.select_fields = []
    15591561
     1562    def add_distinct_fields(self, field_names):
     1563        self.distinct_fields = []
     1564        options = self.get_meta()
     1565
     1566        for name in field_names:
     1567            field, source, opts, join_list, last, _ = self.setup_joins(
     1568                name.split(LOOKUP_SEP), options, self.get_initial_alias(), False)
     1569            self.distinct_fields.append(field.column)
     1570
    15601571    def add_fields(self, field_names, allow_m2m=True):
    15611572        """
    15621573        Adds the given (model) fields to the select set. The field names are
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    index a1bd5cc..857db2f 100644
    a b undefined afterward). 
    345345distinct
    346346~~~~~~~~
    347347
    348 .. method:: distinct()
     348.. method:: distinct(*fields)
    349349
    350350Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
    351351eliminates duplicate rows from the query results.
    query spans multiple tables, it's possible to get duplicate results when a 
    375375    ``values()`` together, be careful when ordering by fields not in the
    376376    ``values()`` call.
    377377
     378.. versionadded:: 1.4
     379   ``distinct()`` takes optional positional arguments ``*fields``, which specify
     380   field names to which the ``DISTINCT`` should be limited. This translates to
     381   a ``SELECT DISTINCT ON`` SQL query. Note that this ``DISTINCT ON`` query is
     382   only available in PostgreSQL.
     383
     384.. note::
     385    When optional ``*fields`` are given, you will have to add an :meth:`order_by`
     386    call with the same field names as the leftmost arguments.
     387
    378388values
    379389~~~~~~
    380390
  • tests/regressiontests/queries/models.py

    diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
    index d1e5e6e..9cf3a09 100644
    a b class Celebrity(models.Model): 
    208208    name = models.CharField("Name", max_length=20)
    209209    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
    210210
     211    def __unicode__(self):
     212        return self.name
     213
    211214class TvChef(Celebrity):
    212215    pass
    213216
    class ObjectC(models.Model): 
    317320
    318321    def __unicode__(self):
    319322       return self.name
     323
     324
     325class Staff(models.Model):
     326    name = models.CharField(max_length=50)
     327    organisation = models.CharField(max_length=100)
     328    tags = models.ManyToManyField(Tag, through='StaffTag')
     329
     330    def __unicode__(self):
     331        return self.name
     332
     333class StaffTag(models.Model):
     334    staff = models.ForeignKey(Staff)
     335    tag = models.ForeignKey(Tag)
     336
     337    def __unicode__(self):
     338        return u"%s -> %s" % (self.tag, self.staff)
     339
  • tests/regressiontests/queries/tests.py

    diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
    index 31856ba..4903a22 100644
    a b from models import (Annotation, Article, Author, Celebrity, Child, Cover, Detail 
    1515    DumbCategory, ExtraInfo, Fan, Item, LeafA, LoopX, LoopZ, ManagedModel,
    1616    Member, NamedCategory, Note, Number, Plaything, PointerA, Ranking, Related,
    1717    Report, ReservedName, Tag, TvChef, Valid, X, Food, Eaten, Node, ObjectA, ObjectB,
    18     ObjectC)
     18    ObjectC, Staff, StaffTag)
    1919
    2020
    2121class BaseQuerysetTest(TestCase):
    class ConditionalTests(BaseQuerysetTest): 
    16061606        t4 = Tag.objects.create(name='t4', parent=t3)
    16071607        t5 = Tag.objects.create(name='t5', parent=t3)
    16081608
     1609        p1_o1 = Staff.objects.create(name="p1", organisation="o1")
     1610        p2_o1 = Staff.objects.create(name="p2", organisation="o1")
     1611        p3_o1 = Staff.objects.create(name="p3", organisation="o1")
     1612        p1_o2 = Staff.objects.create(name="p1", organisation="o2")
     1613
     1614        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1615        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1616
     1617        celeb1 = Celebrity.objects.create(name="c1")
     1618        celeb2 = Celebrity.objects.create(name="c2")
     1619
     1620        self.fan1 = Fan.objects.create(fan_of=celeb1)
     1621        self.fan2 = Fan.objects.create(fan_of=celeb1)
     1622        self.fan3 = Fan.objects.create(fan_of=celeb2)
     1623
    16091624    # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
    16101625    # (Python issue 1242657), so these cases return an empty list, rather than
    16111626    # raising an exception. Not a lot we can do about that, unfortunately, due to
    class ConditionalTests(BaseQuerysetTest): 
    16771692            2500
    16781693        )
    16791694
     1695    @skipUnlessDBFeature('can_distinct_on_fields')
     1696    def test_ticket6422(self):
     1697        # (qset, expected) tuples
     1698        qsets = (
     1699            (
     1700                Staff.objects.distinct().order_by('name'),
     1701                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1702            ),
     1703            (
     1704                Staff.objects.distinct('name').order_by('name'),
     1705                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1706            ),
     1707            (
     1708                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
     1709                ['<Staff: p1>', '<Staff: p1>'],
     1710            ),
     1711            (
     1712                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
     1713                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1714            ),
     1715            (
     1716                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
     1717                    distinct('name').order_by('name'),
     1718                ['<Celebrity: c1>', '<Celebrity: c2>'],
     1719            ),
     1720            (
     1721                StaffTag.objects.distinct('staff','tag'),
     1722                ['<StaffTag: t1 -> p1>'],
     1723            ),
     1724            (
     1725                Tag.objects.order_by('parent__pk').distinct('parent'),
     1726                ['<Tag: t3>', '<Tag: t5>', '<Tag: t1>'],
     1727            )
     1728        )
     1729
     1730        for qset, expected in qsets:
     1731            self.assertQuerysetEqual(qset, expected)
     1732            self.assertEqual(qset.count(), len(expected))
     1733
     1734        # and check the fieldlookup
     1735        self.assertRaises(
     1736            FieldError,
     1737            lambda: Staff.objects.distinct('shrubbery')
     1738        )
     1739
     1740
    16801741class UnionTests(unittest.TestCase):
    16811742    """
    16821743    Tests for the union of two querysets. Bug #12252.
Back to Top