Ticket #6422: distinct_on.diff

File distinct_on.diff, 12.8 KB (added by Jeffrey Gelens, 13 years ago)
  • AUTHORS

    diff --git a/AUTHORS b/AUTHORS
    a b  
    535535    ye7cakf02@sneakemail.com
    536536    ymasuda@ethercube.com
    537537    Jesse Young <adunar@gmail.com>
    538538    Mykola Zamkovoi <nickzam@gmail.com>
    539539    zegor
    540540    Gasper Zejn <zejn@kiberpipa.org>
    541541    Jarek Zgoda <jarek.zgoda@gmail.com>
    542542    Cheng Zhang
     543    Jeffrey Gelens <jeffrey@gelens.org>
    543544
    544545A big THANK YOU goes to:
    545546
    546547    Rob Curley and Ralph Gage for letting us open-source Django.
    547548
    548549    Frank Wiles for making excellent arguments for open-sourcing, and for
    549550    his sage sysadmin advice.
    550551
  • django/db/backends/__init__.py

    diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
    a b  
    336336
    337337    # Features that need to be confirmed at runtime
    338338    # Cache whether the confirmation has been performed.
    339339    _confirmed = False
    340340    supports_transactions = None
    341341    supports_stddev = None
    342342    can_introspect_foreign_keys = None
    343343
     344    # Support for the DISTINCT ON clause
     345    can_distinct_on_fields = False
     346
    344347    def __init__(self, connection):
    345348        self.connection = connection
    346349
    347350    def confirm(self):
    348351        "Perform manual checks of any database features that might vary between installs"
    349352        self._confirmed = True
    350353        self.supports_transactions = self._supports_transactions()
    351354        self.supports_stddev = self._supports_stddev()
     
    489492    def fulltext_search_sql(self, field_name):
    490493        """
    491494        Returns the SQL WHERE clause to use in order to perform a full-text
    492495        search of the given field_name. Note that the resulting string should
    493496        contain a '%s' placeholder for the value being searched against.
    494497        """
    495498        raise NotImplementedError('Full-text search is not implemented for this database backend')
    496499
     500    def distinct(self, db_table, fields):
     501        """
     502        Returns an SQL DISTINCT clause which removes duplicate rows from the
     503        result set. If any fields are given, only the given fields are being
     504        checked for duplicates.
     505        """
     506        if fields:
     507            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
     508        else:
     509            return 'DISTINCT'
     510
    497511    def last_executed_query(self, cursor, sql, params):
    498512        """
    499513        Returns a string of the query last executed by the given cursor, with
    500514        placeholders replaced with actual values.
    501515
    502516        `sql` is the raw query containing placeholders, and `params` is the
    503517        sequence of parameters. These are used by default, but this method
    504518        exists for database backends to provide a better implementation
  • django/db/backends/postgresql_psycopg2/base.py

    diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
    a b  
    6666class DatabaseFeatures(BaseDatabaseFeatures):
    6767    needs_datetime_string_cast = False
    6868    can_return_id_from_insert = True
    6969    requires_rollback_on_dirty_transaction = True
    7070    has_real_datatype = True
    7171    can_defer_constraint_checks = True
    7272    has_select_for_update = True
    7373    has_select_for_update_nowait = True
     74    can_distinct_on_fields = True
    7475
    7576
    7677class DatabaseWrapper(BaseDatabaseWrapper):
    7778    vendor = 'postgresql'
    7879    operators = {
    7980        'exact': '= %s',
    8081        'iexact': '= UPPER(%s)',
    8182        'contains': 'LIKE %s',
  • django/db/backends/postgresql_psycopg2/operations.py

    diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
    a b  
    168168        macro in src/include/pg_config_manual.h .
    169169
    170170        This implementation simply returns 63, but can easily be overridden by a
    171171        custom database backend that inherits most of its behavior from this one.
    172172        """
    173173
    174174        return 63
    175175
     176    def distinct(self, db_table, fields):
     177        if fields:
     178            table_name = self.quote_name(db_table)
     179            fields = [table_name + "." + self.quote_name(field) for field in fields]
     180            return 'DISTINCT ON (%s)' % ', '.join(fields)
     181        else:
     182            return 'DISTINCT'
     183
    176184    def last_executed_query(self, cursor, sql, params):
    177185        # http://initd.org/psycopg/docs/cursor.html#cursor.query
    178186        # The query attribute is a Psycopg extension to the DB API 2.0.
    179187        return cursor.query
    180188
    181189    def return_insert_id(self):
    182190        return "RETURNING %s", ()
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    a b  
    660660        """
    661661        assert self.query.can_filter(), \
    662662                "Cannot reorder a query once a slice has been taken."
    663663        obj = self._clone()
    664664        obj.query.clear_ordering()
    665665        obj.query.add_ordering(*field_names)
    666666        return obj
    667667
    668     def distinct(self, true_or_false=True):
     668    def distinct(self, *field_names):
    669669        """
    670670        Returns a new QuerySet instance that will select only distinct results.
    671671        """
    672672        obj = self._clone()
    673         obj.query.distinct = true_or_false
     673        obj.query.add_distinct_fields(field_names)
     674        obj.query.distinct = True
     675
    674676        return obj
    675677
    676678    def extra(self, select=None, where=None, params=None, tables=None,
    677679              order_by=None, select_params=None):
    678680        """
    679681        Adds extra SQL fragments to the query.
    680682        """
    681683        assert self.query.can_filter(), \
     
    10851087        return self
    10861088
    10871089    def order_by(self, *field_names):
    10881090        """
    10891091        Always returns EmptyQuerySet.
    10901092        """
    10911093        return self
    10921094
    1093     def distinct(self, true_or_false=True):
     1095    def distinct(self, fields=None):
    10941096        """
    10951097        Always returns EmptyQuerySet.
    10961098        """
    10971099        return self
    10981100
    10991101    def extra(self, select=None, where=None, params=None, tables=None,
    11001102              order_by=None, select_params=None):
    11011103        """
  • django/db/models/sql/compiler.py

    diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
    a b  
    6969
    7070        where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection)
    7171        having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection)
    7272        params = []
    7373        for val in self.query.extra_select.itervalues():
    7474            params.extend(val[1])
    7575
    7676        result = ['SELECT']
     77
    7778        if self.query.distinct:
    78             result.append('DISTINCT')
     79            distinct_sql = self.connection.ops.distinct(
     80                self.query.model._meta.db_table, self.query.distinct_fields)
     81            result.append(distinct_sql)
     82
    7983        result.append(', '.join(out_cols + self.query.ordering_aliases))
    8084
    8185        result.append('FROM')
    8286        result.extend(from_)
    8387        params.extend(f_params)
    8488
    8589        if where:
    8690            result.append('WHERE %s' % where)
  • django/db/models/sql/query.py

    diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
    a b  
    120120        self.tables = []    # Aliases in the order they are created.
    121121        self.where = where()
    122122        self.where_class = where
    123123        self.group_by = None
    124124        self.having = where()
    125125        self.order_by = []
    126126        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
    127127        self.distinct = False
     128        self.distinct_fields = None
    128129        self.select_for_update = False
    129130        self.select_for_update_nowait = False
    130131        self.select_related = False
    131132        self.related_select_cols = []
    132133
    133134        # SQL aggregate-related attributes
    134135        self.aggregates = SortedDict() # Maps alias -> SQL aggregate function
    135136        self.aggregate_select_mask = None
     
    251252        if self.group_by is None:
    252253            obj.group_by = None
    253254        else:
    254255            obj.group_by = self.group_by[:]
    255256        obj.having = copy.deepcopy(self.having, memo=memo)
    256257        obj.order_by = self.order_by[:]
    257258        obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
    258259        obj.distinct = self.distinct
     260        obj.distinct_fields = self.distinct_fields
    259261        obj.select_for_update = self.select_for_update
    260262        obj.select_for_update_nowait = self.select_for_update_nowait
    261263        obj.select_related = self.select_related
    262264        obj.related_select_cols = []
    263265        obj.aggregates = copy.deepcopy(self.aggregates, memo=memo)
    264266        if self.aggregate_select_mask is None:
    265267            obj.aggregate_select_mask = None
    266268        else:
     
    379381            in zip(query.aggregate_select.items(), result)
    380382        ])
    381383
    382384    def get_count(self, using):
    383385        """
    384386        Performs a COUNT() query using the current filter constraints.
    385387        """
    386388        obj = self.clone()
    387         if len(self.select) > 1 or self.aggregate_select:
     389        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
    388390            # If a select clause exists, then the query has already started to
    389391            # specify the columns that are to be returned.
    390392            # In this case, we need to use a subquery to evaluate the count.
    391393            from django.db.models.sql.subqueries import AggregateQuery
    392394            subquery = obj
    393395            subquery.clear_ordering(True)
    394396            subquery.clear_limits()
    395397
     
    15521554        """
    15531555        Clears the list of fields to select (but not extra_select columns).
    15541556        Some queryset types completely replace any existing list of select
    15551557        columns.
    15561558        """
    15571559        self.select = []
    15581560        self.select_fields = []
    15591561
     1562    def add_distinct_fields(self, field_names):
     1563        self.distinct_fields = []
     1564        options = self.get_meta()
     1565
     1566        for name in field_names:
     1567            field, source, opts, join_list, last, _ = self.setup_joins(
     1568                name.split(LOOKUP_SEP), options, self.get_initial_alias(), False)
     1569            self.distinct_fields.append(field.column)
     1570
    15601571    def add_fields(self, field_names, allow_m2m=True):
    15611572        """
    15621573        Adds the given (model) fields to the select set. The field names are
    15631574        added in the order specified.
    15641575        """
    15651576        alias = self.get_initial_alias()
    15661577        opts = self.get_meta()
    15671578
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    a b  
    134134    introspection:
    135135
    136136    .. attribute:: ordered
    137137
    138138        ``True`` if the ``QuerySet`` is ordered -- i.e. has an order_by()
    139139        clause or a default ordering on the model. ``False`` otherwise.
    140140
    141141    .. attribute:: db
    142    
     142
    143143        The database that will be used if this query is executed now.
    144144
    145145    .. note::
    146146
    147147        The ``query`` parameter to :class:`QuerySet` exists so that specialized
    148148        query subclasses such as
    149149        :class:`~django.contrib.gis.db.models.GeoQuerySet` can reconstruct
    150150        internal query state. The value of the parameter is an opaque
     
    340340``order_by()``). If no such ordering is defined for a given
    341341``QuerySet``, calling ``reverse()`` on it has no real effect (the
    342342ordering was undefined prior to calling ``reverse()``, and will remain
    343343undefined afterward).
    344344
    345345distinct
    346346~~~~~~~~
    347347
    348 .. method:: distinct()
     348.. method:: distinct(*fields)
    349349
    350350Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
    351351eliminates duplicate rows from the query results.
    352352
    353353By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
    354354is rarely a problem, because simple queries such as ``Blog.objects.all()``
    355355don't introduce the possibility of duplicate result rows. However, if your
    356356query spans multiple tables, it's possible to get duplicate results when a
    357357``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
    358358
     359.. versionadded:: 1.4
     360   ``distinct()`` takes optional positional arguments ``*fields``, which specify
     361   field names to which the ``DISTINCT`` should be limited. This translates to
     362   a ``SELECT DISTINCT ON`` SQL query. Note that this ``DISTINCT ON`` query is
     363   only available in PostgreSQL.
     364
     365.. note::
     366    When optional ``*fields`` are given, you will have to add an :meth:`order_by`
     367    call with the same field names as the leftmost arguments.
     368
    359369.. note::
    360370    Any fields used in an :meth:`order_by` call are included in the SQL
    361371    ``SELECT`` columns. This can sometimes lead to unexpected results when
    362372    used in conjunction with ``distinct()``. If you order by fields from a
    363373    related model, those fields will be added to the selected columns and they
    364374    may make otherwise duplicate rows appear to be distinct. Since the extra
    365375    columns don't appear in the returned results (they are only there to
    366376    support ordering), it sometimes looks like non-distinct results are being
Back to Top