Ticket #6422: distinct_on.6.diff

File distinct_on.6.diff, 18.5 KB (added by Jeffrey Gelens, 13 years ago)

Fixed field traversal

  • AUTHORS

    diff --git a/AUTHORS b/AUTHORS
    a b  
    544544    ye7cakf02@sneakemail.com
    545545    ymasuda@ethercube.com
    546546    Jesse Young <adunar@gmail.com>
    547547    Mykola Zamkovoi <nickzam@gmail.com>
    548548    zegor
    549549    Gasper Zejn <zejn@kiberpipa.org>
    550550    Jarek Zgoda <jarek.zgoda@gmail.com>
    551551    Cheng Zhang
     552    Jeffrey Gelens <jeffrey@gelens.org>
    552553
    553554A big THANK YOU goes to:
    554555
    555556    Rob Curley and Ralph Gage for letting us open-source Django.
    556557
    557558    Frank Wiles for making excellent arguments for open-sourcing, and for
    558559    his sage sysadmin advice.
    559560
  • django/db/backends/__init__.py

    diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
    a b  
    368368
    369369    # Features that need to be confirmed at runtime
    370370    # Cache whether the confirmation has been performed.
    371371    _confirmed = False
    372372    supports_transactions = None
    373373    supports_stddev = None
    374374    can_introspect_foreign_keys = None
    375375
     376    # Support for the DISTINCT ON clause
     377    can_distinct_on_fields = False
     378
    376379    def __init__(self, connection):
    377380        self.connection = connection
    378381
    379382    def confirm(self):
    380383        "Perform manual checks of any database features that might vary between installs"
    381384        self._confirmed = True
    382385        self.supports_transactions = self._supports_transactions()
    383386        self.supports_stddev = self._supports_stddev()
     
    521524    def fulltext_search_sql(self, field_name):
    522525        """
    523526        Returns the SQL WHERE clause to use in order to perform a full-text
    524527        search of the given field_name. Note that the resulting string should
    525528        contain a '%s' placeholder for the value being searched against.
    526529        """
    527530        raise NotImplementedError('Full-text search is not implemented for this database backend')
    528531
     532    def distinct(self, db_table, fields):
     533        """
     534        Returns an SQL DISTINCT clause which removes duplicate rows from the
     535        result set. If any fields are given, only the given fields are being
     536        checked for duplicates.
     537        """
     538        if fields:
     539            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
     540        else:
     541            return 'DISTINCT'
     542
    529543    def last_executed_query(self, cursor, sql, params):
    530544        """
    531545        Returns a string of the query last executed by the given cursor, with
    532546        placeholders replaced with actual values.
    533547
    534548        `sql` is the raw query containing placeholders, and `params` is the
    535549        sequence of parameters. These are used by default, but this method
    536550        exists for database backends to provide a better implementation
  • django/db/backends/postgresql_psycopg2/base.py

    diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
    a b  
    7070    needs_datetime_string_cast = False
    7171    can_return_id_from_insert = True
    7272    requires_rollback_on_dirty_transaction = True
    7373    has_real_datatype = True
    7474    can_defer_constraint_checks = True
    7575    has_select_for_update = True
    7676    has_select_for_update_nowait = True
    7777    has_bulk_insert = True
     78    can_distinct_on_fields = True
    7879
    7980
    8081class DatabaseWrapper(BaseDatabaseWrapper):
    8182    vendor = 'postgresql'
    8283    operators = {
    8384        'exact': '= %s',
    8485        'iexact': '= UPPER(%s)',
    8586        'contains': 'LIKE %s',
  • django/db/backends/postgresql_psycopg2/operations.py

    diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
    a b  
    168168        macro in src/include/pg_config_manual.h .
    169169
    170170        This implementation simply returns 63, but can easily be overridden by a
    171171        custom database backend that inherits most of its behavior from this one.
    172172        """
    173173
    174174        return 63
    175175
     176    def distinct(self, fields):
     177        if fields:
     178            fields_sql = []
     179
     180            for field in fields:
     181                fields_sql.append(
     182                    self.quote_name(field.model._meta.db_table) + "." + \
     183                    self.quote_name(field.column)
     184                )
     185
     186            return 'DISTINCT ON (%s)' % ', '.join(fields_sql)
     187        else:
     188            return 'DISTINCT'
     189
    176190    def last_executed_query(self, cursor, sql, params):
    177191        # http://initd.org/psycopg/docs/cursor.html#cursor.query
    178192        # The query attribute is a Psycopg extension to the DB API 2.0.
    179193        return cursor.query
    180194
    181195    def return_insert_id(self):
    182196        return "RETURNING %s", ()
    183197
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    a b  
    693693        """
    694694        assert self.query.can_filter(), \
    695695                "Cannot reorder a query once a slice has been taken."
    696696        obj = self._clone()
    697697        obj.query.clear_ordering()
    698698        obj.query.add_ordering(*field_names)
    699699        return obj
    700700
    701     def distinct(self, true_or_false=True):
     701    def distinct(self, *field_names):
    702702        """
    703703        Returns a new QuerySet instance that will select only distinct results.
    704704        """
    705705        obj = self._clone()
    706         obj.query.distinct = true_or_false
     706        obj.query.add_distinct_fields(field_names)
     707        obj.query.distinct = True
     708
    707709        return obj
    708710
    709711    def extra(self, select=None, where=None, params=None, tables=None,
    710712              order_by=None, select_params=None):
    711713        """
    712714        Adds extra SQL fragments to the query.
    713715        """
    714716        assert self.query.can_filter(), \
     
    11181120        return self
    11191121
    11201122    def order_by(self, *field_names):
    11211123        """
    11221124        Always returns EmptyQuerySet.
    11231125        """
    11241126        return self
    11251127
    1126     def distinct(self, true_or_false=True):
     1128    def distinct(self, fields=None):
    11271129        """
    11281130        Always returns EmptyQuerySet.
    11291131        """
    11301132        return self
    11311133
    11321134    def extra(self, select=None, where=None, params=None, tables=None,
    11331135              order_by=None, select_params=None):
    11341136        """
  • django/db/models/sql/compiler.py

    diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
    a b  
    7171
    7272        where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection)
    7373        having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection)
    7474        params = []
    7575        for val in self.query.extra_select.itervalues():
    7676            params.extend(val[1])
    7777
    7878        result = ['SELECT']
     79
    7980        if self.query.distinct:
    80             result.append('DISTINCT')
     81            result.append(self.connection.ops.distinct(self.query.distinct_fields))
     82
    8183        result.append(', '.join(out_cols + self.query.ordering_aliases))
    8284
    8385        result.append('FROM')
    8486        result.extend(from_)
    8587        params.extend(f_params)
    8688
    8789        if where:
    8890            result.append('WHERE %s' % where)
  • django/db/models/sql/query.py

    diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
    a b  
    121121        self.tables = []    # Aliases in the order they are created.
    122122        self.where = where()
    123123        self.where_class = where
    124124        self.group_by = None
    125125        self.having = where()
    126126        self.order_by = []
    127127        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
    128128        self.distinct = False
     129        self.distinct_fields = None
    129130        self.select_for_update = False
    130131        self.select_for_update_nowait = False
    131132        self.select_related = False
    132133        self.related_select_cols = []
    133134
    134135        # SQL aggregate-related attributes
    135136        self.aggregates = SortedDict() # Maps alias -> SQL aggregate function
    136137        self.aggregate_select_mask = None
     
    259260        if self.group_by is None:
    260261            obj.group_by = None
    261262        else:
    262263            obj.group_by = self.group_by[:]
    263264        obj.having = copy.deepcopy(self.having, memo=memo)
    264265        obj.order_by = self.order_by[:]
    265266        obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
    266267        obj.distinct = self.distinct
     268        obj.distinct_fields = self.distinct_fields
    267269        obj.select_for_update = self.select_for_update
    268270        obj.select_for_update_nowait = self.select_for_update_nowait
    269271        obj.select_related = self.select_related
    270272        obj.related_select_cols = []
    271273        obj.aggregates = copy.deepcopy(self.aggregates, memo=memo)
    272274        if self.aggregate_select_mask is None:
    273275            obj.aggregate_select_mask = None
    274276        else:
     
    387389            in zip(query.aggregate_select.items(), result)
    388390        ])
    389391
    390392    def get_count(self, using):
    391393        """
    392394        Performs a COUNT() query using the current filter constraints.
    393395        """
    394396        obj = self.clone()
    395         if len(self.select) > 1 or self.aggregate_select:
     397        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
    396398            # If a select clause exists, then the query has already started to
    397399            # specify the columns that are to be returned.
    398400            # In this case, we need to use a subquery to evaluate the count.
    399401            from django.db.models.sql.subqueries import AggregateQuery
    400402            subquery = obj
    401403            subquery.clear_ordering(True)
    402404            subquery.clear_limits()
    403405
     
    15901592        """
    15911593        Clears the list of fields to select (but not extra_select columns).
    15921594        Some queryset types completely replace any existing list of select
    15931595        columns.
    15941596        """
    15951597        self.select = []
    15961598        self.select_fields = []
    15971599
     1600    def add_distinct_fields(self, field_names):
     1601        self.distinct_fields = []
     1602        options = self.get_meta()
     1603
     1604        for name in field_names:
     1605            field, source, opts, join_list, last, _ = self.setup_joins(
     1606                name.split(LOOKUP_SEP), options, self.get_initial_alias(), False)
     1607            self.distinct_fields.append(field)
     1608
    15981609    def add_fields(self, field_names, allow_m2m=True):
    15991610        """
    16001611        Adds the given (model) fields to the select set. The field names are
    16011612        added in the order specified.
    16021613        """
    16031614        alias = self.get_initial_alias()
    16041615        opts = self.get_meta()
    16051616
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    a b  
    340340a default ordering, or when using :meth:`order_by()`). If no such ordering is
    341341defined for a given ``QuerySet``, calling ``reverse()`` on it has no real
    342342effect (the ordering was undefined prior to calling ``reverse()``, and will
    343343remain undefined afterward).
    344344
    345345distinct
    346346~~~~~~~~
    347347
    348 .. method:: distinct()
     348.. method:: distinct(*fields)
    349349
    350350Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
    351351eliminates duplicate rows from the query results.
    352352
    353353By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
    354354is rarely a problem, because simple queries such as ``Blog.objects.all()``
    355355don't introduce the possibility of duplicate result rows. However, if your
    356356query spans multiple tables, it's possible to get duplicate results when a
     
    369369    selected, the columns used in any :meth:`order_by()` (or default model
    370370    ordering) will still be involved and may affect uniqueness of the results.
    371371
    372372    The moral here is that if you are using ``distinct()`` be careful about
    373373    ordering by related models. Similarly, when using ``distinct()`` and
    374374    :meth:`values()` together, be careful when ordering by fields not in the
    375375    :meth:`values()` call.
    376376
     377.. versionadded:: 1.4
     378   ``distinct()`` takes optional positional arguments ``*fields``, which specify
     379   field names to which the ``DISTINCT`` should be limited. This translates to
     380   a ``SELECT DISTINCT ON`` SQL query. Note that this ``DISTINCT ON`` query is
     381   only available in PostgreSQL.
     382
     383.. note::
     384    When optional ``*fields`` are given, you will have to add an :meth:`order_by`
     385    call with the same field names as the leftmost arguments.
     386
    377387values
    378388~~~~~~
    379389
    380390.. method:: values(*fields)
    381391
    382392Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns
    383393dictionaries when used as an iterable, rather than model-instance objects.
    384394
  • tests/regressiontests/queries/models.py

    diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
    a b  
    203203
    204204# An inter-related setup with a model subclass that has a nullable
    205205# path to another model, and a return path from that model.
    206206
    207207class Celebrity(models.Model):
    208208    name = models.CharField("Name", max_length=20)
    209209    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
    210210
     211    def __unicode__(self):
     212        return self.name
     213
    211214class TvChef(Celebrity):
    212215    pass
    213216
    214217class Fan(models.Model):
    215218    fan_of = models.ForeignKey(Celebrity)
    216219
    217220# Multiple foreign keys
    218221class LeafA(models.Model):
     
    337340            return "category item: " + str(self.category)
    338341
    339342class OneToOneCategory(models.Model):
    340343    new_name = models.CharField(max_length=15)
    341344    category = models.OneToOneField(SimpleCategory)
    342345
    343346    def __unicode__(self):
    344347        return "one2one " + self.new_name
    345    
     348
     349class Staff(models.Model):
     350    name = models.CharField(max_length=50)
     351    organisation = models.CharField(max_length=100)
     352    tags = models.ManyToManyField(Tag, through='StaffTag')
     353
     354    def __unicode__(self):
     355        return self.name
     356
     357class StaffTag(models.Model):
     358    staff = models.ForeignKey(Staff)
     359    tag = models.ForeignKey(Tag)
     360
     361    def __unicode__(self):
     362        return u"%s -> %s" % (self.tag, self.staff)
  • tests/regressiontests/queries/tests.py

    diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
    a b  
    1010from django.test import TestCase, skipUnlessDBFeature
    1111from django.utils import unittest
    1212from django.utils.datastructures import SortedDict
    1313
    1414from models import (Annotation, Article, Author, Celebrity, Child, Cover, Detail,
    1515    DumbCategory, ExtraInfo, Fan, Item, LeafA, LoopX, LoopZ, ManagedModel,
    1616    Member, NamedCategory, Note, Number, Plaything, PointerA, Ranking, Related,
    1717    Report, ReservedName, Tag, TvChef, Valid, X, Food, Eaten, Node, ObjectA, ObjectB,
    18     ObjectC, CategoryItem, SimpleCategory, SpecialCategory, OneToOneCategory)
     18    ObjectC, CategoryItem, SimpleCategory, SpecialCategory, OneToOneCategory,
     19    Staff, StaffTag)
    1920
    2021
    2122class BaseQuerysetTest(TestCase):
    2223    def assertValueQuerysetEqual(self, qs, values):
    2324        return self.assertQuerysetEqual(qs, values, transform=lambda x: x)
    2425
    2526
    2627class Queries1Tests(BaseQuerysetTest):
     
    17311732    def setUp(self):
    17321733        generic = NamedCategory.objects.create(name="Generic")
    17331734        t1 = Tag.objects.create(name='t1', category=generic)
    17341735        t2 = Tag.objects.create(name='t2', parent=t1, category=generic)
    17351736        t3 = Tag.objects.create(name='t3', parent=t1)
    17361737        t4 = Tag.objects.create(name='t4', parent=t3)
    17371738        t5 = Tag.objects.create(name='t5', parent=t3)
    17381739
     1740        p1_o1 = Staff.objects.create(name="p1", organisation="o1")
     1741        p2_o1 = Staff.objects.create(name="p2", organisation="o1")
     1742        p3_o1 = Staff.objects.create(name="p3", organisation="o1")
     1743        p1_o2 = Staff.objects.create(name="p1", organisation="o2")
     1744
     1745        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1746        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1747
     1748        celeb1 = Celebrity.objects.create(name="c1")
     1749        celeb2 = Celebrity.objects.create(name="c2")
     1750
     1751        self.fan1 = Fan.objects.create(fan_of=celeb1)
     1752        self.fan2 = Fan.objects.create(fan_of=celeb1)
     1753        self.fan3 = Fan.objects.create(fan_of=celeb2)
     1754
    17391755    # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
    17401756    # (Python issue 1242657), so these cases return an empty list, rather than
    17411757    # raising an exception. Not a lot we can do about that, unfortunately, due to
    17421758    # the way Python handles list() calls internally. Thus, we skip the tests for
    17431759    # Python 2.6.
    17441760    @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6")
    17451761    def test_infinite_loop(self):
    17461762        # If you're not careful, it's possible to introduce infinite loops via
     
    18021818            Number.objects.filter(num__in=numbers[:2000]).count(),
    18031819            2000
    18041820        )
    18051821        self.assertEqual(
    18061822            Number.objects.filter(num__in=numbers).count(),
    18071823            2500
    18081824        )
    18091825
     1826    @skipUnlessDBFeature('can_distinct_on_fields')
     1827    def test_ticket6422(self):
     1828        # (qset, expected) tuples
     1829        qsets = (
     1830            (
     1831                Staff.objects.distinct().order_by('name'),
     1832                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1833            ),
     1834            (
     1835                Staff.objects.distinct('name').order_by('name'),
     1836                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1837            ),
     1838            (
     1839                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
     1840                ['<Staff: p1>', '<Staff: p1>'],
     1841            ),
     1842            (
     1843                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
     1844                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1845            ),
     1846            (
     1847                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
     1848                    distinct('name').order_by('name'),
     1849                ['<Celebrity: c1>', '<Celebrity: c2>'],
     1850            ),
     1851            (
     1852                StaffTag.objects.distinct('staff','tag'),
     1853                ['<StaffTag: t1 -> p1>'],
     1854            ),
     1855            (
     1856                Tag.objects.order_by('parent__pk').distinct('parent'),
     1857                ['<Tag: t3>', '<Tag: t5>', '<Tag: t1>'],
     1858            ),
     1859            (
     1860                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
     1861                ['<StaffTag: t1 -> p1>'],
     1862            ),
     1863        )
     1864
     1865        for qset, expected in qsets:
     1866            self.assertQuerysetEqual(qset, expected)
     1867            self.assertEqual(qset.count(), len(expected))
     1868
     1869        # and check the fieldlookup
     1870        self.assertRaises(
     1871            FieldError,
     1872            lambda: Staff.objects.distinct('shrubbery')
     1873        )
     1874
     1875
    18101876class UnionTests(unittest.TestCase):
    18111877    """
    18121878    Tests for the union of two querysets. Bug #12252.
    18131879    """
    18141880    def setUp(self):
    18151881        objectas = []
    18161882        objectbs = []
    18171883        objectcs = []
Back to Top