Ticket #6422: distinct_on.10.diff

File distinct_on.10.diff, 21.4 KB (added by Jeffrey Gelens, 8 years ago)

fixed when calling distinct() multiple times on the same query set

  • AUTHORS

    diff --git a/AUTHORS b/AUTHORS
    a b  
    198198    Vincent Foley <vfoleybourgon@yahoo.ca>
    199199    Alcides Fonseca
    200200    Rudolph Froger <rfroger@estrate.nl>
    201201    Jorge Gajon <gajon@gajon.org>
    202202    gandalf@owca.info
    203203    Marc Garcia <marc.garcia@accopensys.com>
    204204    Andy Gayton <andy-django@thecablelounge.com>
    205205    geber@datacollect.com
     206    Jeffrey Gelens <jeffrey@gelens.org>
    206207    Baishampayan Ghose
    207208    Joshua Ginsberg <jag@flowtheory.net>
    208209    Dimitris Glezos <dimitris@glezos.com>
    209210    glin@seznam.cz
    210211    martin.glueck@gmail.com
    211212    Artyom Gnilov <boobsd@gmail.com>
    212213    Ben Godfrey <http://aftnn.org>
    213214    GomoX <gomo@datafull.com>
  • django/db/backends/__init__.py

    diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
    a b  
    372372
    373373    # Features that need to be confirmed at runtime
    374374    # Cache whether the confirmation has been performed.
    375375    _confirmed = False
    376376    supports_transactions = None
    377377    supports_stddev = None
    378378    can_introspect_foreign_keys = None
    379379
     380    # Support for the DISTINCT ON clause
     381    can_distinct_on_fields = False
     382
    380383    def __init__(self, connection):
    381384        self.connection = connection
    382385
    383386    def confirm(self):
    384387        "Perform manual checks of any database features that might vary between installs"
    385388        self._confirmed = True
    386389        self.supports_transactions = self._supports_transactions()
    387390        self.supports_stddev = self._supports_stddev()
     
    525528    def fulltext_search_sql(self, field_name):
    526529        """
    527530        Returns the SQL WHERE clause to use in order to perform a full-text
    528531        search of the given field_name. Note that the resulting string should
    529532        contain a '%s' placeholder for the value being searched against.
    530533        """
    531534        raise NotImplementedError('Full-text search is not implemented for this database backend')
    532535
     536    def distinct(self, fields):
     537        """
     538        Returns an SQL DISTINCT clause which removes duplicate rows from the
     539        result set. If any fields are given, only the given fields are being
     540        checked for duplicates.
     541        """
     542        if fields:
     543            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
     544        else:
     545            return 'DISTINCT'
     546
    533547    def last_executed_query(self, cursor, sql, params):
    534548        """
    535549        Returns a string of the query last executed by the given cursor, with
    536550        placeholders replaced with actual values.
    537551
    538552        `sql` is the raw query containing placeholders, and `params` is the
    539553        sequence of parameters. These are used by default, but this method
    540554        exists for database backends to provide a better implementation
  • django/db/backends/postgresql_psycopg2/base.py

    diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
    a b  
    7777    can_return_id_from_insert = True
    7878    requires_rollback_on_dirty_transaction = True
    7979    has_real_datatype = True
    8080    can_defer_constraint_checks = True
    8181    has_select_for_update = True
    8282    has_select_for_update_nowait = True
    8383    has_bulk_insert = True
    8484    supports_tablespaces = True
     85    can_distinct_on_fields = True
    8586
    8687class DatabaseWrapper(BaseDatabaseWrapper):
    8788    vendor = 'postgresql'
    8889    operators = {
    8990        'exact': '= %s',
    9091        'iexact': '= UPPER(%s)',
    9192        'contains': 'LIKE %s',
    9293        'icontains': 'LIKE UPPER(%s)',
  • django/db/backends/postgresql_psycopg2/operations.py

    diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
    a b  
    174174        macro in src/include/pg_config_manual.h .
    175175
    176176        This implementation simply returns 63, but can easily be overridden by a
    177177        custom database backend that inherits most of its behavior from this one.
    178178        """
    179179
    180180        return 63
    181181
     182    def distinct(self, fields):
     183        if fields:
     184            fields_sql = []
     185
     186            for field in fields:
     187                fields_sql.append(
     188                    self.quote_name(field.model._meta.db_table) + "." + \
     189                    self.quote_name(field.column)
     190                )
     191
     192            return 'DISTINCT ON (%s)' % ', '.join(fields_sql)
     193        else:
     194            return 'DISTINCT'
     195
    182196    def last_executed_query(self, cursor, sql, params):
    183197        # http://initd.org/psycopg/docs/cursor.html#cursor.query
    184198        # The query attribute is a Psycopg extension to the DB API 2.0.
    185199        return cursor.query
    186200
    187201    def return_insert_id(self):
    188202        return "RETURNING %s", ()
    189203
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    a b  
    733733        """
    734734        assert self.query.can_filter(), \
    735735                "Cannot reorder a query once a slice has been taken."
    736736        obj = self._clone()
    737737        obj.query.clear_ordering()
    738738        obj.query.add_ordering(*field_names)
    739739        return obj
    740740
    741     def distinct(self, true_or_false=True):
     741    def distinct(self, *field_names):
    742742        """
    743743        Returns a new QuerySet instance that will select only distinct results.
    744744        """
     745        assert self.query.can_filter(), \
     746                "Cannot create distinct fields once a slice has been taken."
    745747        obj = self._clone()
    746         obj.query.distinct = true_or_false
     748        obj.query.distinct_fields = []
     749        obj.query.add_distinct_fields(*field_names)
     750        obj.query.distinct = True
     751
    747752        return obj
    748753
    749754    def extra(self, select=None, where=None, params=None, tables=None,
    750755              order_by=None, select_params=None):
    751756        """
    752757        Adds extra SQL fragments to the query.
    753758        """
    754759        assert self.query.can_filter(), \
     
    11611166        return self
    11621167
    11631168    def order_by(self, *field_names):
    11641169        """
    11651170        Always returns EmptyQuerySet.
    11661171        """
    11671172        return self
    11681173
    1169     def distinct(self, true_or_false=True):
     1174    def distinct(self, fields=None):
    11701175        """
    11711176        Always returns EmptyQuerySet.
    11721177        """
    11731178        return self
    11741179
    11751180    def extra(self, select=None, where=None, params=None, tables=None,
    11761181              order_by=None, select_params=None):
    11771182        """
  • django/db/models/sql/compiler.py

    diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
    a b  
    5858        """
    5959        if with_limits and self.query.low_mark == self.query.high_mark:
    6060            return '', ()
    6161
    6262        self.pre_sql_setup()
    6363        out_cols = self.get_columns(with_col_aliases)
    6464        ordering, ordering_group_by = self.get_ordering()
    6565
     66        if self.query.distinct:
     67            distinct_fields = self.get_distinct()
     68
    6669        # This must come after 'select' and 'ordering' -- see docstring of
    6770        # get_from_clause() for details.
    6871        from_, f_params = self.get_from_clause()
    6972
    7073        qn = self.quote_name_unless_alias
    7174
    7275        where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection)
    7376        having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection)
    7477        params = []
    7578        for val in self.query.extra_select.itervalues():
    7679            params.extend(val[1])
    7780
    7881        result = ['SELECT']
     82
    7983        if self.query.distinct:
    80             result.append('DISTINCT')
     84            result.append(self.connection.ops.distinct(distinct_fields))
     85
    8186        result.append(', '.join(out_cols + self.query.ordering_aliases))
    8287
    8388        result.append('FROM')
    8489        result.extend(from_)
    8590        params.extend(f_params)
    8691
    8792        if where:
    8893            result.append('WHERE %s' % where)
     
    287292            else:
    288293                r = '%s.%s' % (qn(alias), qn2(field.column))
    289294                result.append(r)
    290295                aliases.add(r)
    291296                if with_aliases:
    292297                    col_aliases.add(field.column)
    293298        return result, aliases
    294299
     300    def get_distinct(self):
     301        result = []
     302        options = self.query.model._meta
     303
     304        for name in self.query.distinct_fields:
     305            field, source, opts, join_list, last, extra = self.query.setup_joins(
     306                name.split(LOOKUP_SEP), options, self.query.get_initial_alias(), False)
     307            result.append(field)
     308
     309            if self.query._distinct_cache:
     310                for joined_field in self.query._distinct_cache:
     311                    self.query.unref_alias(joined_field)
     312                self.query._distinct_cache = []
     313
     314            self.query._distinct_cache.extend(join_list)
     315
     316        return result
     317
     318
    295319    def get_ordering(self):
    296320        """
    297321        Returns a tuple containing a list representing the SQL elements in the
    298322        "order by" clause, and the list of SQL elements that need to be added
    299323        to the GROUP BY clause as a result of the ordering.
    300324
    301325        Also sets the ordering_aliases attribute on this instance to a list of
    302326        extra aliases needed in the select.
  • django/db/models/sql/query.py

    diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
    a b  
    122122        self.tables = []    # Aliases in the order they are created.
    123123        self.where = where()
    124124        self.where_class = where
    125125        self.group_by = None
    126126        self.having = where()
    127127        self.order_by = []
    128128        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
    129129        self.distinct = False
     130        self.distinct_fields = []
     131        self._distinct_cache = []
    130132        self.select_for_update = False
    131133        self.select_for_update_nowait = False
    132134        self.select_related = False
    133135        self.related_select_cols = []
    134136
    135137        # SQL aggregate-related attributes
    136138        self.aggregates = SortedDict() # Maps alias -> SQL aggregate function
    137139        self.aggregate_select_mask = None
     
    260262        if self.group_by is None:
    261263            obj.group_by = None
    262264        else:
    263265            obj.group_by = self.group_by[:]
    264266        obj.having = copy.deepcopy(self.having, memo=memo)
    265267        obj.order_by = self.order_by[:]
    266268        obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
    267269        obj.distinct = self.distinct
     270        obj.distinct_fields = self.distinct_fields[:]
     271        obj._distinct_cache = self._distinct_cache[:]
    268272        obj.select_for_update = self.select_for_update
    269273        obj.select_for_update_nowait = self.select_for_update_nowait
    270274        obj.select_related = self.select_related
    271275        obj.related_select_cols = []
    272276        obj.aggregates = copy.deepcopy(self.aggregates, memo=memo)
    273277        if self.aggregate_select_mask is None:
    274278            obj.aggregate_select_mask = None
    275279        else:
     
    388392            in zip(query.aggregate_select.items(), result)
    389393        ])
    390394
    391395    def get_count(self, using):
    392396        """
    393397        Performs a COUNT() query using the current filter constraints.
    394398        """
    395399        obj = self.clone()
    396         if len(self.select) > 1 or self.aggregate_select:
     400        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
    397401            # If a select clause exists, then the query has already started to
    398402            # specify the columns that are to be returned.
    399403            # In this case, we need to use a subquery to evaluate the count.
    400404            from django.db.models.sql.subqueries import AggregateQuery
    401405            subquery = obj
    402406            subquery.clear_ordering(True)
    403407            subquery.clear_limits()
    404408
     
    15911595        """
    15921596        Clears the list of fields to select (but not extra_select columns).
    15931597        Some queryset types completely replace any existing list of select
    15941598        columns.
    15951599        """
    15961600        self.select = []
    15971601        self.select_fields = []
    15981602
     1603    def add_distinct_fields(self, *field_names):
     1604        """
     1605        Adds and resolves the given fields to the query's "distinct on" clause.
     1606        """
     1607        self.distinct_fields = field_names
     1608
     1609
    15991610    def add_fields(self, field_names, allow_m2m=True):
    16001611        """
    16011612        Adds the given (model) fields to the select set. The field names are
    16021613        added in the order specified.
    16031614        """
    16041615        alias = self.get_initial_alias()
    16051616        opts = self.get_meta()
    16061617
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    a b  
    340340a default ordering, or when using :meth:`order_by()`). If no such ordering is
    341341defined for a given ``QuerySet``, calling ``reverse()`` on it has no real
    342342effect (the ordering was undefined prior to calling ``reverse()``, and will
    343343remain undefined afterward).
    344344
    345345distinct
    346346~~~~~~~~
    347347
    348 .. method:: distinct()
     348.. method:: distinct([*fields])
    349349
    350350Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
    351351eliminates duplicate rows from the query results.
    352352
    353353By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
    354354is rarely a problem, because simple queries such as ``Blog.objects.all()``
    355355don't introduce the possibility of duplicate result rows. However, if your
    356356query spans multiple tables, it's possible to get duplicate results when a
     
    369369    selected, the columns used in any :meth:`order_by()` (or default model
    370370    ordering) will still be involved and may affect uniqueness of the results.
    371371
    372372    The moral here is that if you are using ``distinct()`` be careful about
    373373    ordering by related models. Similarly, when using ``distinct()`` and
    374374    :meth:`values()` together, be careful when ordering by fields not in the
    375375    :meth:`values()` call.
    376376
     377.. versionadded:: 1.4
     378
     379The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
     380They are names of fields to which the ``DISTINCT`` should be limited. This
     381translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
     382duplicate rows not by comparing all fields in a row, but by comparing only the given
     383fields.
     384
     385.. note::
     386    Note that the ability to specify field names is only available in PostgreSQL.
     387
     388.. note::
     389    When using the ``DISTINCT ON`` functionality it is required that the columns given
     390    to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
     391    DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
     392    don't specify an order, then you'll get some arbitrary row.
     393
     394Examples::
     395
     396    >>> Author.objects.distinct()
     397    [...]
     398
     399    >>> Entry.objects.order_by('pub_date').distinct('pub_date')
     400    [...]
     401
     402    >>> Entry.objects.order_by('blog').distinct('blog')
     403    [...]
     404
     405    >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
     406    [...]
     407
     408    >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
     409    [...]
     410
     411    >>> Entry.objects.order_by('author', 'pub_date').distinct('author')
     412    [...]
     413
    377414values
    378415~~~~~~
    379416
    380417.. method:: values(*fields)
    381418
    382419Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns
    383420dictionaries when used as an iterable, rather than model-instance objects.
    384421
  • tests/regressiontests/queries/models.py

    diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
    a b  
    204204
    205205# An inter-related setup with a model subclass that has a nullable
    206206# path to another model, and a return path from that model.
    207207
    208208class Celebrity(models.Model):
    209209    name = models.CharField("Name", max_length=20)
    210210    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
    211211
     212    def __unicode__(self):
     213        return self.name
     214
    212215class TvChef(Celebrity):
    213216    pass
    214217
    215218class Fan(models.Model):
    216219    fan_of = models.ForeignKey(Celebrity)
    217220
    218221# Multiple foreign keys
    219222class LeafA(models.Model):
     
    339342
    340343class OneToOneCategory(models.Model):
    341344    new_name = models.CharField(max_length=15)
    342345    category = models.OneToOneField(SimpleCategory)
    343346
    344347    def __unicode__(self):
    345348        return "one2one " + self.new_name
    346349
     350class Staff(models.Model):
     351    name = models.CharField(max_length=50)
     352    organisation = models.CharField(max_length=100)
     353    tags = models.ManyToManyField(Tag, through='StaffTag')
     354
     355    def __unicode__(self):
     356        return self.name
     357
     358class StaffTag(models.Model):
     359    staff = models.ForeignKey(Staff)
     360    tag = models.ForeignKey(Tag)
     361
     362    def __unicode__(self):
     363        return u"%s -> %s" % (self.tag, self.staff)
  • tests/regressiontests/queries/tests.py

    diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
    a b  
    1313from django.utils import unittest
    1414from django.utils.datastructures import SortedDict
    1515
    1616from .models import (Annotation, Article, Author, Celebrity, Child, Cover,
    1717    Detail, DumbCategory, ExtraInfo, Fan, Item, LeafA, LoopX, LoopZ,
    1818    ManagedModel, Member, NamedCategory, Note, Number, Plaything, PointerA,
    1919    Ranking, Related, Report, ReservedName, Tag, TvChef, Valid, X, Food, Eaten,
    2020    Node, ObjectA, ObjectB, ObjectC, CategoryItem, SimpleCategory,
    21     SpecialCategory, OneToOneCategory)
     21    SpecialCategory, OneToOneCategory, Staff, StaffTag)
    2222
    2323
    2424class BaseQuerysetTest(TestCase):
    2525    def assertValueQuerysetEqual(self, qs, values):
    2626        return self.assertQuerysetEqual(qs, values, transform=lambda x: x)
    2727
    2828
    2929class Queries1Tests(BaseQuerysetTest):
     
    17341734    def setUp(self):
    17351735        generic = NamedCategory.objects.create(name="Generic")
    17361736        t1 = Tag.objects.create(name='t1', category=generic)
    17371737        t2 = Tag.objects.create(name='t2', parent=t1, category=generic)
    17381738        t3 = Tag.objects.create(name='t3', parent=t1)
    17391739        t4 = Tag.objects.create(name='t4', parent=t3)
    17401740        t5 = Tag.objects.create(name='t5', parent=t3)
    17411741
     1742        p1_o1 = Staff.objects.create(name="p1", organisation="o1")
     1743        p2_o1 = Staff.objects.create(name="p2", organisation="o1")
     1744        p3_o1 = Staff.objects.create(name="p3", organisation="o1")
     1745        p1_o2 = Staff.objects.create(name="p1", organisation="o2")
     1746
     1747        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1748        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1749
     1750        celeb1 = Celebrity.objects.create(name="c1")
     1751        celeb2 = Celebrity.objects.create(name="c2")
     1752
     1753        self.fan1 = Fan.objects.create(fan_of=celeb1)
     1754        self.fan2 = Fan.objects.create(fan_of=celeb1)
     1755        self.fan3 = Fan.objects.create(fan_of=celeb2)
     1756
    17421757    # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
    17431758    # (Python issue 1242657), so these cases return an empty list, rather than
    17441759    # raising an exception. Not a lot we can do about that, unfortunately, due to
    17451760    # the way Python handles list() calls internally. Thus, we skip the tests for
    17461761    # Python 2.6.
    17471762    @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6")
    17481763    def test_infinite_loop(self):
    17491764        # If you're not careful, it's possible to introduce infinite loops via
     
    18051820            Number.objects.filter(num__in=numbers[:2000]).count(),
    18061821            2000
    18071822        )
    18081823        self.assertEqual(
    18091824            Number.objects.filter(num__in=numbers).count(),
    18101825            2500
    18111826        )
    18121827
     1828    @skipUnlessDBFeature('can_distinct_on_fields')
     1829    def test_ticket6422(self):
     1830        """QuerySet.distinct('field', ...) works"""
     1831        # (qset, expected) tuples
     1832        qsets = (
     1833            (
     1834                Staff.objects.distinct().order_by('name'),
     1835                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1836            ),
     1837            (
     1838                Staff.objects.distinct('name').order_by('name'),
     1839                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1840            ),
     1841            (
     1842                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
     1843                ['<Staff: p1>', '<Staff: p1>'],
     1844            ),
     1845            (
     1846                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
     1847                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1848            ),
     1849            (
     1850                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
     1851                    distinct('name').order_by('name'),
     1852                ['<Celebrity: c1>', '<Celebrity: c2>'],
     1853            ),
     1854            (
     1855                StaffTag.objects.distinct('staff','tag'),
     1856                ['<StaffTag: t1 -> p1>'],
     1857            ),
     1858            (
     1859                Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
     1860                ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
     1861            ),
     1862            (
     1863                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
     1864                ['<StaffTag: t1 -> p1>'],
     1865            ),
     1866        )
     1867
     1868        for qset, expected in qsets:
     1869            self.assertQuerysetEqual(qset, expected)
     1870            self.assertEqual(qset.count(), len(expected))
     1871
     1872        # Test join unreffing
     1873        c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
     1874        self.assertIn('INNER JOIN', str(c1.query))
     1875
     1876        c2 = c1.distinct('pk')
     1877        self.assertNotIn('INNER JOIN', str(c2.query))
     1878
     1879
    18131880class UnionTests(unittest.TestCase):
    18141881    """
    18151882    Tests for the union of two querysets. Bug #12252.
    18161883    """
    18171884    def setUp(self):
    18181885        objectas = []
    18191886        objectbs = []
    18201887        objectcs = []
Back to Top