Context Navigation

Back to Ticket #6422

Ticket #6422: distinct_on.13.diff

File distinct_on.13.diff, 42.5 KB (added by Jeffrey Gelens, 13 years ago)

AUTHORS

diff --git a/AUTHORS b/AUTHORS

-              a
     Vincent Foley <vfoleybourgon@yahoo.ca>
     Alcides Fonseca
     Rudolph Froger <rfroger@estrate.nl>
     Jorge Gajon <gajon@gajon.org>
     gandalf@owca.info
     Marc Garcia <marc.garcia@accopensys.com>
     Andy Gayton <andy-django@thecablelounge.com>
     geber@datacollect.com
+    Jeffrey Gelens <jeffrey@gelens.org>
     Baishampayan Ghose
     Joshua Ginsberg <jag@flowtheory.net>
     Dimitris Glezos <dimitris@glezos.com>
     glin@seznam.cz
     martin.glueck@gmail.com
     Artyom Gnilov <boobsd@gmail.com>
     Ben Godfrey <http://aftnn.org>
     GomoX <gomo@datafull.com>

django/db/backends/init.py

diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py

-              a
     # Features that need to be confirmed at runtime
     # Cache whether the confirmation has been performed.
     _confirmed = False
     supports_transactions = None
     supports_stddev = None
     can_introspect_foreign_keys = None
+    # Support for the DISTINCT ON clause
+    can_distinct_on_fields = False
     def __init__(self, connection):
         self.connection = connection
     def confirm(self):
         "Perform manual checks of any database features that might vary between installs"
         self._confirmed = True
         self.supports_transactions = self._supports_transactions()
         self.supports_stddev = self._supports_stddev()
 …
     def fulltext_search_sql(self, field_name):
         """
         Returns the SQL WHERE clause to use in order to perform a full-text
         search of the given field_name. Note that the resulting string should
         contain a '%s' placeholder for the value being searched against.
         """
         raise NotImplementedError('Full-text search is not implemented for this database backend')
+    def distinct(self, fields):
+        """
+        Returns an SQL DISTINCT clause which removes duplicate rows from the
+        result set. If any fields are given, only the given fields are being
+        checked for duplicates.
+        """
+        if fields:
+            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
+        else:
+            return 'DISTINCT'
     def last_executed_query(self, cursor, sql, params):
         """
         Returns a string of the query last executed by the given cursor, with
         placeholders replaced with actual values.
         `sql` is the raw query containing placeholders, and `params` is the
         sequence of parameters. These are used by default, but this method
         exists for database backends to provide a better implementation

django/db/backends/postgresql_psycopg2/base.py

diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py

-              a
     can_return_id_from_insert = True
     requires_rollback_on_dirty_transaction = True
     has_real_datatype = True
     can_defer_constraint_checks = True
     has_select_for_update = True
     has_select_for_update_nowait = True
     has_bulk_insert = True
     supports_tablespaces = True
+    can_distinct_on_fields = True
 class DatabaseWrapper(BaseDatabaseWrapper):
     vendor = 'postgresql'
     operators = {
         'exact': '= %s',
         'iexact': '= UPPER(%s)',
         'contains': 'LIKE %s',
         'icontains': 'LIKE UPPER(%s)',

django/db/backends/postgresql_psycopg2/operations.py

diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py

-              a
         macro in src/include/pg_config_manual.h .
         This implementation simply returns 63, but can easily be overridden by a
         custom database backend that inherits most of its behavior from this one.
         """
         return 63
+    def distinct(self, fields):
+        if fields:
+            return 'DISTINCT ON (%s)' % ', '.join(fields)
+        else:
+            return 'DISTINCT'
     def last_executed_query(self, cursor, sql, params):
         # http://initd.org/psycopg/docs/cursor.html#cursor.query
         # The query attribute is a Psycopg extension to the DB API 2.0.
         return cursor.query
     def return_insert_id(self):
         return "RETURNING %s", ()

django/db/models/query.py

diff --git a/django/db/models/query.py b/django/db/models/query.py

-              a
     def aggregate(self, *args, **kwargs):
         """
         Returns a dictionary containing the calculations (aggregation)
         over the current queryset
         If args is present the expression is passed as a kwarg using
         the Aggregate object's default alias.
         """
+        if self.query.distinct_fields:
+            raise NotImplementedError("aggregate() + distinct(fields) not implemented.")
         for arg in args:
             kwargs[arg.default_alias] = arg
         query = self.query.clone()
         for (alias, aggregate_expr) in kwargs.items():
             query.add_aggregate(aggregate_expr, self.model, alias,
                 is_summary=True)
 …
         """
         assert self.query.can_filter(), \
                 "Cannot reorder a query once a slice has been taken."
         obj = self._clone()
         obj.query.clear_ordering()
         obj.query.add_ordering(*field_names)
         return obj
     def distinct(self, true_or_false=True):
+    def distinct(self, *field_names):
         """
         Returns a new QuerySet instance that will select only distinct results.
         """
+        assert self.query.can_filter(), \
+                "Cannot create distinct fields once a slice has been taken."
         obj = self._clone()
         obj.query.distinct = true_or_false
+        obj.query.add_distinct_fields(*field_names)
         return obj
     def extra(self, select=None, where=None, params=None, tables=None,
               order_by=None, select_params=None):
         """
         Adds extra SQL fragments to the query.
         """
         assert self.query.can_filter(), \
 …
         return self
     def order_by(self, *field_names):
         """
         Always returns EmptyQuerySet.
         """
         return self
     def distinct(self, true_or_false=True):
+    def distinct(self, fields=None):
         """
         Always returns EmptyQuerySet.
         """
         return self
     def extra(self, select=None, where=None, params=None, tables=None,
               order_by=None, select_params=None):
         """

django/db/models/sql/compiler.py

diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py

-              a
         self.using = using
         self.quote_cache = {}
     def pre_sql_setup(self):
         """
         Does any necessary class setup immediately prior to producing SQL. This
         is for things that can't necessarily be done in __init__ because we
         might not have all the pieces in place at that time.
+        # TODO: after the query has been executed, the altered state should be
+        # cleaned. We are not using a clone() of the query here.
         """
         if not self.query.tables:
             self.query.join((None, self.query.model._meta.db_table, None, None))
         if (not self.query.select and self.query.default_cols and not
                 self.query.included_inherited_models):
             self.query.setup_inherited_models()
         if self.query.select_related and not self.query.related_select_cols:
             self.fill_related_selections()
 …
         If 'with_limits' is False, any limit/offset information is not included
         in the query.
         """
         if with_limits and self.query.low_mark == self.query.high_mark:
             return '', ()
         self.pre_sql_setup()
+        # After executing the query, we must get rid of any joins the query
+        # setup created. So, take note of alias counts before the query ran.
+        # However we do not want to get rid of stuff done in pre_sql_setup(),
+        # as the pre_sql_setup will modify query state in a way that forbids
+        # another run of it.
+        self.refcounts_before = self.query.alias_refcount.copy()
         out_cols = self.get_columns(with_col_aliases)
         ordering, ordering_group_by = self.get_ordering()
+        # This must come after 'select' and 'ordering' -- see docstring of
+        # get_from_clause() for details.
+        distinct_fields = self.get_distinct()
+        # This must come after 'select', 'ordering' and 'distinct' -- see
+        # docstring of get_from_clause() for details.
         from_, f_params = self.get_from_clause()
         qn = self.quote_name_unless_alias
         where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection)
         having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection)
         params = []
         for val in self.query.extra_select.itervalues():
             params.extend(val[1])
         result = ['SELECT']
         if self.query.distinct:
+            result.append('DISTINCT')
+            result.append(self.connection.ops.distinct(distinct_fields))
         result.append(', '.join(out_cols + self.query.ordering_aliases))
         result.append('FROM')
         result.extend(from_)
         params.extend(f_params)
         if where:
             result.append('WHERE %s' % where)
             params.extend(w_params)
         grouping, gb_params = self.get_grouping()
         if grouping:
+            if distinct_fields:
+                raise NotImplementedError(
+                    "annotate() + distinct(fields) not implemented.")
             if ordering:
                 # If the backend can't group by PK (i.e., any database
                 # other than MySQL), then any fields mentioned in the
                 # ordering clause needs to be in the group by clause.
                 if not self.connection.features.allows_group_by_pk:
                     for col, col_params in ordering_group_by:
                         if col not in grouping:
                             grouping.append(str(col))
 …
         if self.query.select_for_update and self.connection.features.has_select_for_update:
             # If we've been asked for a NOWAIT query but the backend does not support it,
             # raise a DatabaseError otherwise we could get an unexpected deadlock.
             nowait = self.query.select_for_update_nowait
             if nowait and not self.connection.features.has_select_for_update_nowait:
                 raise DatabaseError('NOWAIT is not supported on this database backend.')
             result.append(self.connection.ops.for_update_sql(nowait=nowait))
+        # Finally do cleanup - get rid of the joins we created above.
+        self.query.reset_refcounts(self.refcounts_before)
         return ' '.join(result), tuple(params)
     def as_nested_sql(self):
         """
         Perform the same functionality as the as_sql() method, returning an
         SQL string and parameters. However, the alias prefixes are bumped
         beforehand (in a copy -- the current query isn't changed), and any
         ordering is removed if the query is unsliced.
 …
             else:
                 r = '%s.%s' % (qn(alias), qn2(field.column))
                 result.append(r)
                 aliases.add(r)
                 if with_aliases:
                     col_aliases.add(field.column)
         return result, aliases
+    def get_distinct(self):
+        """
+        Returns a quoted list of fields to use in DISTINCT ON part of the query.
+        Note that this method can alter the tables in the query, and thus this
+        must be called before get_from_clause().
+        """
+        qn = self.quote_name_unless_alias
+        qn2 = self.connection.ops.quote_name
+        result = []
+        opts = self.query.model._meta
+        for name in self.query.distinct_fields:
+            parts = name.split(LOOKUP_SEP)
+            field, col, alias, _, _ = self._setup_joins(parts, opts, None)
+            col, alias = self._final_join_removal(col, alias)
+            result.append("%s.%s" % (qn(alias), qn2(col)))
+        return result
     def get_ordering(self):
         """
         Returns a tuple containing a list representing the SQL elements in the
         "order by" clause, and the list of SQL elements that need to be added
         to the GROUP BY clause as a result of the ordering.
         Also sets the ordering_aliases attribute on this instance to a list of
         extra aliases needed in the select.
 …
             already_seen=None):
         """
         Returns the table alias (the name might be ambiguous, the alias will
         not be) and column name for ordering by the given 'name' parameter.
         The 'name' is of the form 'field1__field2__...__fieldN'.
         """
         name, order = get_order_dir(name, default_order)
         pieces = name.split(LOOKUP_SEP)
+        if not alias:
+            alias = self.query.get_initial_alias()
+        field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
+                opts, alias, False)
+        alias = joins[-1]
+        col = target.column
+        if not field.rel:
+            # To avoid inadvertent trimming of a necessary alias, use the
+            # refcount to show that we are referencing a non-relation field on
+            # the model.
+            self.query.ref_alias(alias)
+        # Must use left outer joins for nullable fields and their relations.
+        self.query.promote_alias_chain(joins,
+            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
+        field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
         # If we get to this point and the field is a relation to another model,
         # append the default ordering for that model.
         if field.rel and len(joins) > 1 and opts.ordering:
             # Firstly, avoid infinite loops.
             if not already_seen:
                 already_seen = set()
             join_tuple = tuple([self.query.alias_map[j][TABLE_NAME] for j in joins])
 …
                 raise FieldError('Infinite loop caused by ordering.')
             already_seen.add(join_tuple)
             results = []
             for item in opts.ordering:
                 results.extend(self.find_ordering_name(item, opts, alias,
                         order, already_seen))
             return results
+        col, alias = self._final_join_removal(col, alias)
+        return [(alias, col, order)]
+    def _setup_joins(self, pieces, opts, alias):
+        """
+        A helper method for get_ordering and get_distinct. This method will
+        call query.setup_joins, handle refcounts and then promote the joins.
+        Note that get_ordering and get_distinct must produce same target
+        columns on same input, as the prefixes of get_ordering and get_distinct
+        must match. Executing SQL where this is not true is an error.
+        """
+        if not alias:
+            alias = self.query.get_initial_alias()
+        field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
+                opts, alias, False)
+        alias = joins[-1]
+        col = target.column
+        if not field.rel:
+            # To avoid inadvertent trimming of a necessary alias, use the
+            # refcount to show that we are referencing a non-relation field on
+            # the model.
+            self.query.ref_alias(alias)
+        # Must use left outer joins for nullable fields and their relations.
+        # Ordering or distinct must not affect the returned set, and INNER
+        # JOINS for nullable fields could do this.
+        self.query.promote_alias_chain(joins,
+            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
+        return field, col, alias, joins, opts
+    def _final_join_removal(self, col, alias):
+        """
+        A helper method for get_distinct and get_ordering. This method will
+        trim extra not-needed joins from the tail of the join chain.
+        This is very similar to what is done in trim_joins, but we will
+        trim LEFT JOINS here. It would be a good idea to consolidate this
+        method and query.trim_joins().
+        """
         if alias:
-            # We have to do the same "final join" optimisation as in
-            # add_filter, since the final column might not otherwise be part of
-            # the select set (so we can't order on it).
             while 1:
                 join = self.query.alias_map[alias]
                 if col != join[RHS_JOIN_COL]:
                     break
                 self.query.unref_alias(alias)
                 alias = join[LHS_ALIAS]
                 col = join[LHS_JOIN_COL]
         return [(alias, col, order)]
+        return col, alias
     def get_from_clause(self):
         """
         Returns a list of strings that are joined together to go after the
         "FROM" part of the query, as well as a list any extra parameters that
         need to be included. Sub-classes, can override this to create a
         from-clause via a "select".
         This should only be called after any SQL construction methods that
         might change the tables we need. This means the select columns and
         ordering must be done first.
+        might change the tables we need. This means the select columns,
+        ordering and distinct must be done first.
         """
         result = []
         qn = self.quote_name_unless_alias
         qn2 = self.connection.ops.quote_name
         first = True
         for alias in self.query.tables:
             if not self.query.alias_refcount[alias]:
                 continue
 …
 class SQLAggregateCompiler(SQLCompiler):
     def as_sql(self, qn=None):
         """
         Creates the SQL for this query. Returns the SQL string and list of
         parameters.
         """
         if qn is None:
             qn = self.quote_name_unless_alias
         sql = ('SELECT %s FROM (%s) subquery' % (
             ', '.join([
                 aggregate.as_sql(qn, self.connection)
                 for aggregate in self.query.aggregate_select.values()
             ]),
             self.query.subquery)
+        )
         params = self.query.sub_params

django/db/models/sql/query.py

diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py

-              a
         self.tables = []    # Aliases in the order they are created.
         self.where = where()
         self.where_class = where
         self.group_by = None
         self.having = where()
         self.order_by = []
         self.low_mark, self.high_mark = 0, None  # Used for offset/limit
         self.distinct = False
+        self.distinct_fields = []
         self.select_for_update = False
         self.select_for_update_nowait = False
         self.select_related = False
         self.related_select_cols = []
         # SQL aggregate-related attributes
         self.aggregates = SortedDict() # Maps alias -> SQL aggregate function
         self.aggregate_select_mask = None
 …
         if self.group_by is None:
             obj.group_by = None
         else:
             obj.group_by = self.group_by[:]
         obj.having = copy.deepcopy(self.having, memo=memo)
         obj.order_by = self.order_by[:]
         obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
         obj.distinct = self.distinct
+        obj.distinct_fields = self.distinct_fields[:]
         obj.select_for_update = self.select_for_update
         obj.select_for_update_nowait = self.select_for_update_nowait
         obj.select_related = self.select_related
         obj.related_select_cols = []
         obj.aggregates = copy.deepcopy(self.aggregates, memo=memo)
         if self.aggregate_select_mask is None:
             obj.aggregate_select_mask = None
         else:
 …
         obj.extra_tables = self.extra_tables
         obj.extra_order_by = self.extra_order_by
         obj.deferred_loading = copy.deepcopy(self.deferred_loading, memo=memo)
         if self.filter_is_sticky and self.used_aliases:
             obj.used_aliases = self.used_aliases.copy()
         else:
             obj.used_aliases = set()
         obj.filter_is_sticky = False
         obj.__dict__.update(kwargs)
         if hasattr(obj, '_setup_query'):
             obj._setup_query()
         return obj
     def convert_values(self, value, field, connection):
         """Convert the database-returned value into a type that is consistent
         across database backends.
 …
             in zip(query.aggregate_select.items(), result)
         ])
     def get_count(self, using):
         """
         Performs a COUNT() query using the current filter constraints.
         """
         obj = self.clone()
         if len(self.select) > 1 or self.aggregate_select:
+        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
             # If a select clause exists, then the query has already started to
             # specify the columns that are to be returned.
             # In this case, we need to use a subquery to evaluate the count.
             from django.db.models.sql.subqueries import AggregateQuery
             subquery = obj
             subquery.clear_ordering(True)
             subquery.clear_limits()
 …
         'rhs' query.
         """
         assert self.model == rhs.model, \
                 "Cannot combine queries on two different base models."
         assert self.can_filter(), \
                 "Cannot combine queries once a slice has been taken."
         assert self.distinct == rhs.distinct, \
             "Cannot combine a unique query with a non-unique query."
+        assert self.distinct_fields == rhs.distinct_fields, \
+            "Cannot combine queries with different distinct fields."
         self.remove_inherited_models()
         # Work out how to relabel the rhs aliases, if necessary.
         change_map = {}
         used = set()
         conjunction = (connector == AND)
         first = True
         for alias in rhs.tables:
 …
         self.alias_refcount[alias] = 1
         self.tables.append(alias)
         return alias, True
     def ref_alias(self, alias):
         """ Increases the reference count for this alias. """
         self.alias_refcount[alias] += 1
     def unref_alias(self, alias):
+    def unref_alias(self, alias, amount=1):
         """ Decreases the reference count for this alias. """
         self.alias_refcount[alias] -= 1
+        self.alias_refcount[alias] -= amount
     def promote_alias(self, alias, unconditional=False):
         """
         Promotes the join type of an alias to an outer join if it's possible
         for the join to contain NULL values on the left. If 'unconditional' is
         False, the join is only promoted if it is nullable, otherwise it is
         always promoted.
 …
         Walks along a chain of aliases, promoting the first nullable join and
         any joins following that. If 'must_promote' is True, all the aliases in
         the chain are promoted.
         """
         for alias in chain:
             if self.promote_alias(alias, must_promote):
                 must_promote = True
+    def reset_refcounts(self, to_counts):
+        """
+        This method will reset reference counts for aliases so that they match
+        that given in to_counts.
+        """
+        for alias, cur_refcount in self.alias_refcount.copy().items():
+            unref_amount = cur_refcount - to_counts.get(alias, 0)
+            self.unref_alias(alias, unref_amount)
     def promote_unused_aliases(self, initial_refcounts, used_aliases):
         """
         Given a "before" copy of the alias_refcounts dictionary (as
         'initial_refcounts') and a collection of aliases that may have been
         changed or created, works out which aliases have been created since
         then and which ones haven't been used and promotes all of those
         aliases, plus any children of theirs in the alias tree, to outer joins.
         """
 …
             self.ref_alias(alias)
         else:
             alias = self.join((None, self.model._meta.db_table, None, None))
         return alias
     def count_active_tables(self):
         """
         Returns the number of tables in this query with a non-zero reference
+        count.
+        count. Note that after execution, the reference counts are zeroed, so
+        tables added in compiler will not be seen by this method.
         """
         return len([1 for count in self.alias_refcount.itervalues() if count])
     def join(self, connection, always_create=False, exclusions=(),
             promote=False, outer_if_first=False, nullable=False, reuse=None):
         """
         Returns an alias for the join in 'connection', either reusing an
         existing alias for that join or creating a new one. 'connection' is a
 …
         """
         Clears the list of fields to select (but not extra_select columns).
         Some queryset types completely replace any existing list of select
         columns.
         """
         self.select = []
         self.select_fields = []
+    def add_distinct_fields(self, *field_names):
+        """
+        Adds and resolves the given fields to the query's "distinct on" clause.
+        """
+        self.distinct_fields = field_names
+        self.distinct = True
     def add_fields(self, field_names, allow_m2m=True):
         """
         Adds the given (model) fields to the select set. The field names are
         added in the order specified.
         """
         alias = self.get_initial_alias()
         opts = self.get_meta()

docs/ref/models/querysets.txt

diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt

-              a
 a default ordering, or when using :meth:`order_by()`). If no such ordering is
 defined for a given ``QuerySet``, calling ``reverse()`` on it has no real
 effect (the ordering was undefined prior to calling ``reverse()``, and will
 remain undefined afterward).
 distinct
 ~~~~~~~~
 .. method:: distinct()
+.. method:: distinct([*fields])
 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
 eliminates duplicate rows from the query results.
 By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
 is rarely a problem, because simple queries such as ``Blog.objects.all()``
 don't introduce the possibility of duplicate result rows. However, if your
 query spans multiple tables, it's possible to get duplicate results when a
 …
     selected, the columns used in any :meth:`order_by()` (or default model
     ordering) will still be involved and may affect uniqueness of the results.
     The moral here is that if you are using ``distinct()`` be careful about
     ordering by related models. Similarly, when using ``distinct()`` and
     :meth:`values()` together, be careful when ordering by fields not in the
     :meth:`values()` call.
+.. versionadded:: 1.4
+The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
+They are names of fields to which the ``DISTINCT`` should be limited. This
+translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
+duplicate rows not by comparing all fields in a row, but by comparing only the given
+fields.
+.. note::
+    Note that the ability to specify field names is only available in PostgreSQL.
+.. note::
+    When using the ``DISTINCT ON`` functionality it is required that the columns given
+    to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
+    DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
+    don't specify an order, then you'll get some arbitrary row.
+Examples::
+    >>> Author.objects.distinct()
+    [...]
+    >>> Entry.objects.order_by('pub_date').distinct('pub_date')
+    [...]
+    >>> Entry.objects.order_by('blog').distinct('blog')
+    [...]
+    >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
+    [...]
+    >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
+    [...]
+    >>> Entry.objects.order_by('author', 'pub_date').distinct('author')
+    [...]
 values
 ~~~~~~
 .. method:: values(*fields)
 Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns
 dictionaries when used as an iterable, rather than model-instance objects.

new file tests/modeltests/distinct_on_fields/models.py

diff --git a/tests/modeltests/distinct_on_fields/__init__.py b/tests/modeltests/distinct_on_fields/__init__.py
new file mode 100644
diff --git a/tests/modeltests/distinct_on_fields/models.py b/tests/modeltests/distinct_on_fields/models.py
new file mode 100644

-              -
+from django.db import models
+class Tag(models.Model):
+    name = models.CharField(max_length=10)
+    parent = models.ForeignKey('self', blank=True, null=True,
+            related_name='children')
+    class Meta:
+        ordering = ['name']
+    def __unicode__(self):
+        return self.name
+class Celebrity(models.Model):
+    name = models.CharField("Name", max_length=20)
+    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
+    def __unicode__(self):
+        return self.name
+class Fan(models.Model):
+    fan_of = models.ForeignKey(Celebrity)
+class Staff(models.Model):
+    id = models.IntegerField(primary_key=True)
+    name = models.CharField(max_length=50)
+    organisation = models.CharField(max_length=100)
+    tags = models.ManyToManyField(Tag, through='StaffTag')
+    coworkers = models.ManyToManyField('self')
+    def __unicode__(self):
+        return self.name
+class StaffTag(models.Model):
+    staff = models.ForeignKey(Staff)
+    tag = models.ForeignKey(Tag)
+    def __unicode__(self):
+        return u"%s -> %s" % (self.tag, self.staff)

new file tests/modeltests/distinct_on_fields/tests.py

diff --git a/tests/modeltests/distinct_on_fields/tests.py b/tests/modeltests/distinct_on_fields/tests.py
new file mode 100644

-              -
+from __future__ import absolute_import
+from django.db.models import Max
+from django.test import TestCase, skipUnlessDBFeature
+from .models import Tag, Celebrity, Fan, Staff, StaffTag
+class DistinctOnTests(TestCase):
+    def setUp(self):
+        t1 = Tag.objects.create(name='t1')
+        t2 = Tag.objects.create(name='t2', parent=t1)
+        t3 = Tag.objects.create(name='t3', parent=t1)
+        t4 = Tag.objects.create(name='t4', parent=t3)
+        t5 = Tag.objects.create(name='t5', parent=t3)
+        p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1")
+        p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1")
+        p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1")
+        p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2")
+        p1_o1.coworkers.add(p2_o1, p3_o1)
+        StaffTag.objects.create(staff=p1_o1, tag=t1)
+        StaffTag.objects.create(staff=p1_o1, tag=t1)
+        celeb1 = Celebrity.objects.create(name="c1")
+        celeb2 = Celebrity.objects.create(name="c2")
+        self.fan1 = Fan.objects.create(fan_of=celeb1)
+        self.fan2 = Fan.objects.create(fan_of=celeb1)
+        self.fan3 = Fan.objects.create(fan_of=celeb2)
+    @skipUnlessDBFeature('can_distinct_on_fields')
+    def test_basic_distinct_on(self):
+        """QuerySet.distinct('field', ...) works"""
+        # (qset, expected) tuples
+        qsets = (
+            (
+                Staff.objects.distinct().order_by('name'),
+                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
+            ),
+            (
+                Staff.objects.distinct('name').order_by('name'),
+                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
+            ),
+            (
+                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
+                ['<Staff: p1>', '<Staff: p1>'],
+            ),
+            (
+                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
+                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
+            ),
+            (
+                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
+                    distinct('name').order_by('name'),
+                ['<Celebrity: c1>', '<Celebrity: c2>'],
+            ),
+            # Does combining querysets work?
+            (
+                (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\
+                    distinct('name').order_by('name')
+                |Celebrity.objects.filter(fan__in=[self.fan3]).\
+                    distinct('name').order_by('name')),
+                ['<Celebrity: c1>', '<Celebrity: c2>'],
+            ),
+            (
+                StaffTag.objects.distinct('staff','tag'),
+                ['<StaffTag: t1 -> p1>'],
+            ),
+            (
+                Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
+                ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
+            ),
+            (
+                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
+                ['<StaffTag: t1 -> p1>'],
+            ),
+            # Fetch the alphabetically first coworker for each worker
+            (
+                (Staff.objects.distinct('id').order_by('id', 'coworkers__name').
+                               values_list('id', 'coworkers__name')),
+                ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"]
+            ),
+        )
+        for qset, expected in qsets:
+            self.assertQuerysetEqual(qset, expected)
+            self.assertEqual(qset.count(), len(expected))
+        # Combining queries with different distinct_fields is not allowed.
+        base_qs = Celebrity.objects.all()
+        self.assertRaisesMessage(
+            AssertionError,
+            "Cannot combine queries with different distinct fields.",
+            lambda: (base_qs.distinct('id') & base_qs.distinct('name'))
+        )
+        # Test join unreffing
+        c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
+        self.assertIn('OUTER JOIN', str(c1.query))
+        c2 = c1.distinct('pk')
+        self.assertNotIn('OUTER JOIN', str(c2.query))
+    @skipUnlessDBFeature('can_distinct_on_fields')
+    def test_distinct_not_implemented_checks(self):
+        # distinct + annotate not allowed
+        with self.assertRaises(NotImplementedError):
+            Celebrity.objects.annotate(Max('id')).distinct('id')[0]
+        with self.assertRaises(NotImplementedError):
+            Celebrity.objects.distinct('id').annotate(Max('id'))[0]
+        # However this check is done only when the query executes, so you
+        # can use distinct() to remove the fields before execution.
+        Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0]
+        # distinct + aggregate not allowed
+        with self.assertRaises(NotImplementedError):
+            Celebrity.objects.distinct('id').aggregate(Max('id'))

tests/regressiontests/queries/models.py

diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py

-              a
 # An inter-related setup with a model subclass that has a nullable
 # path to another model, and a return path from that model.
 class Celebrity(models.Model):
     name = models.CharField("Name", max_length=20)
     greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
+    def __unicode__(self):
+        return self.name
 class TvChef(Celebrity):
     pass
 class Fan(models.Model):
     fan_of = models.ForeignKey(Celebrity)
 # Multiple foreign keys
 class LeafA(models.Model):
 …
         return "category item: " + str(self.category)
 class OneToOneCategory(models.Model):
     new_name = models.CharField(max_length=15)
     category = models.OneToOneField(SimpleCategory)
     def __unicode__(self):
         return "one2one " + self.new_name

tests/regressiontests/queries/tests.py

diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py

-              a
         q1 = Item.objects.filter(tags=self.t1)
         q2 = Item.objects.filter(note=self.n3, tags=self.t2)
         q3 = Item.objects.filter(creator=self.a4)
         self.assertQuerysetEqual(
             ((q1 & q2) | q3).order_by('name'),
             ['<Item: four>', '<Item: one>']
+        )
-    # FIXME: This is difficult to fix and very much an edge case, so punt for
-    # now.  This is related to the order_by() tests for ticket #2253, but the
-    # old bug exhibited itself here (q2 was pulling too many tables into the
-    # combined query with the new ordering, but only because we have evaluated
-    # q2 already).
-    @unittest.expectedFailure
     def test_order_by_tables(self):
         q1 = Item.objects.order_by('name')
         q2 = Item.objects.filter(id=self.i1.id)
         list(q2)
         self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
+    def test_order_by_join_unref(self):
+        """
+        This test is related to the above one, testing that there aren't
+        old JOINs in the query.
+        """
+        qs = Celebrity.objects.order_by('greatest_fan__fan_of')
+        self.assertIn('OUTER JOIN', str(qs.query))
+        qs = qs.order_by('id')
+        self.assertNotIn('OUTER JOIN', str(qs.query))
     def test_tickets_4088_4306(self):
         self.assertQuerysetEqual(
             Report.objects.filter(creator=1001),
             ['<Report: r1>']
+        )
         self.assertQuerysetEqual(
             Report.objects.filter(creator__num=1001),
             ['<Report: r1>']
 …
         self.assertEqual(
             list(Node.objects.filter(node=node2)),
             [node1]
+        )
 class ConditionalTests(BaseQuerysetTest):
     """Tests whose execution depend on dfferent environment conditions like
+    """Tests whose execution depend on different environment conditions like
     Python version or DB backend features"""
     def setUp(self):
         generic = NamedCategory.objects.create(name="Generic")
         t1 = Tag.objects.create(name='t1', category=generic)
         t2 = Tag.objects.create(name='t2', parent=t1, category=generic)
         t3 = Tag.objects.create(name='t3', parent=t1)
         t4 = Tag.objects.create(name='t4', parent=t3)
         t5 = Tag.objects.create(name='t5', parent=t3)
     # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
     # (Python issue 1242657), so these cases return an empty list, rather than
     # raising an exception. Not a lot we can do about that, unfortunately, due to
     # the way Python handles list() calls internally. Thus, we skip the tests for
     # Python 2.6.
     @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6")
     def test_infinite_loop(self):
         # If you're not careful, it's possible to introduce infinite loops via
 …
     # Sqlite 3 does not support passing in more than 1000 parameters except by
     # changing a parameter at compilation time.
     @skipUnlessDBFeature('supports_1000_query_parameters')
     def test_ticket14244(self):
         # Test that the "in" lookup works with lists of 1000 items or more.
         Number.objects.all().delete()
         numbers = range(2500)
+        # Speed up object creation by using bulk_create. However keep the
+        # batches somewhat small - otherwise we might hit the parameter limit
+        # in bulk_create.
+        batch = []
         for num in numbers:
+            _ = Number.objects.create(num=num)
+            batch.append(Number(num=num))
+            if len(batch) == 100:
+                Number.objects.bulk_create(batch)
+                batch = []
+        if batch:
+            Number.objects.bulk_create(batch)
         self.assertEqual(
             Number.objects.filter(num__in=numbers[:1000]).count(),
+        )
         self.assertEqual(
             Number.objects.filter(num__in=numbers[:1001]).count(),
+        )
 …
             Number.objects.filter(num__in=numbers[:2000]).count(),
+        )
         self.assertEqual(
             Number.objects.filter(num__in=numbers).count(),
+        )
 class UnionTests(unittest.TestCase):
     """
     Tests for the union of two querysets. Bug #12252.
     """
     def setUp(self):
         objectas = []
         objectbs = []
         objectcs = []

tests/regressiontests/select_related_regress/tests.py

diff --git a/tests/regressiontests/select_related_regress/tests.py b/tests/regressiontests/select_related_regress/tests.py

-              a
         connections=Connection.objects.filter(start__device__building=b, end__device__building=b).order_by('id')
         self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
             [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
         connections=Connection.objects.filter(start__device__building=b, end__device__building=b).select_related().order_by('id')
         self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
             [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
         # This final query should only join seven tables (port, device and building
         # twice each, plus connection once).
         self.assertEqual(connections.query.count_active_tables(), 7)
+        # This final query should only have seven tables (port, device and building
+        # twice each, plus connection once). Thus, 6 joins plus the FROM table.
+        self.assertEqual(str(connections.query).count(" JOIN "), 6)
     def test_regression_8106(self):
         """
         Regression test for bug #8106.
         Same sort of problem as the previous test, but this time there are
         more extra tables to pull in as part of the select_related() and some

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #6422: distinct_on.13.diff

AUTHORS

django/db/backends/init.py

django/db/backends/postgresql_psycopg2/base.py

django/db/backends/postgresql_psycopg2/operations.py

django/db/models/query.py

django/db/models/sql/compiler.py

django/db/models/sql/query.py

docs/ref/models/querysets.txt

new file tests/modeltests/distinct_on_fields/models.py

new file tests/modeltests/distinct_on_fields/tests.py

tests/regressiontests/queries/models.py

tests/regressiontests/queries/tests.py

tests/regressiontests/select_related_regress/tests.py

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us