Ticket #6422: distinct_on.13.diff

File distinct_on.13.diff, 42.5 KB (added by Jeffrey Gelens, 9 years ago)
  • AUTHORS

    diff --git a/AUTHORS b/AUTHORS
    a b  
    198198    Vincent Foley <vfoleybourgon@yahoo.ca>
    199199    Alcides Fonseca
    200200    Rudolph Froger <rfroger@estrate.nl>
    201201    Jorge Gajon <gajon@gajon.org>
    202202    gandalf@owca.info
    203203    Marc Garcia <marc.garcia@accopensys.com>
    204204    Andy Gayton <andy-django@thecablelounge.com>
    205205    geber@datacollect.com
     206    Jeffrey Gelens <jeffrey@gelens.org>
    206207    Baishampayan Ghose
    207208    Joshua Ginsberg <jag@flowtheory.net>
    208209    Dimitris Glezos <dimitris@glezos.com>
    209210    glin@seznam.cz
    210211    martin.glueck@gmail.com
    211212    Artyom Gnilov <boobsd@gmail.com>
    212213    Ben Godfrey <http://aftnn.org>
    213214    GomoX <gomo@datafull.com>
  • django/db/backends/__init__.py

    diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
    a b  
    374374
    375375    # Features that need to be confirmed at runtime
    376376    # Cache whether the confirmation has been performed.
    377377    _confirmed = False
    378378    supports_transactions = None
    379379    supports_stddev = None
    380380    can_introspect_foreign_keys = None
    381381
     382    # Support for the DISTINCT ON clause
     383    can_distinct_on_fields = False
     384
    382385    def __init__(self, connection):
    383386        self.connection = connection
    384387
    385388    def confirm(self):
    386389        "Perform manual checks of any database features that might vary between installs"
    387390        self._confirmed = True
    388391        self.supports_transactions = self._supports_transactions()
    389392        self.supports_stddev = self._supports_stddev()
     
    527530    def fulltext_search_sql(self, field_name):
    528531        """
    529532        Returns the SQL WHERE clause to use in order to perform a full-text
    530533        search of the given field_name. Note that the resulting string should
    531534        contain a '%s' placeholder for the value being searched against.
    532535        """
    533536        raise NotImplementedError('Full-text search is not implemented for this database backend')
    534537
     538    def distinct(self, fields):
     539        """
     540        Returns an SQL DISTINCT clause which removes duplicate rows from the
     541        result set. If any fields are given, only the given fields are being
     542        checked for duplicates.
     543        """
     544        if fields:
     545            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
     546        else:
     547            return 'DISTINCT'
     548
    535549    def last_executed_query(self, cursor, sql, params):
    536550        """
    537551        Returns a string of the query last executed by the given cursor, with
    538552        placeholders replaced with actual values.
    539553
    540554        `sql` is the raw query containing placeholders, and `params` is the
    541555        sequence of parameters. These are used by default, but this method
    542556        exists for database backends to provide a better implementation
  • django/db/backends/postgresql_psycopg2/base.py

    diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
    a b  
    7777    can_return_id_from_insert = True
    7878    requires_rollback_on_dirty_transaction = True
    7979    has_real_datatype = True
    8080    can_defer_constraint_checks = True
    8181    has_select_for_update = True
    8282    has_select_for_update_nowait = True
    8383    has_bulk_insert = True
    8484    supports_tablespaces = True
     85    can_distinct_on_fields = True
    8586
    8687class DatabaseWrapper(BaseDatabaseWrapper):
    8788    vendor = 'postgresql'
    8889    operators = {
    8990        'exact': '= %s',
    9091        'iexact': '= UPPER(%s)',
    9192        'contains': 'LIKE %s',
    9293        'icontains': 'LIKE UPPER(%s)',
  • django/db/backends/postgresql_psycopg2/operations.py

    diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
    a b  
    174174        macro in src/include/pg_config_manual.h .
    175175
    176176        This implementation simply returns 63, but can easily be overridden by a
    177177        custom database backend that inherits most of its behavior from this one.
    178178        """
    179179
    180180        return 63
    181181
     182    def distinct(self, fields):
     183        if fields:
     184            return 'DISTINCT ON (%s)' % ', '.join(fields)
     185        else:
     186            return 'DISTINCT'
     187
    182188    def last_executed_query(self, cursor, sql, params):
    183189        # http://initd.org/psycopg/docs/cursor.html#cursor.query
    184190        # The query attribute is a Psycopg extension to the DB API 2.0.
    185191        return cursor.query
    186192
    187193    def return_insert_id(self):
    188194        return "RETURNING %s", ()
    189195
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    a b  
    318318    def aggregate(self, *args, **kwargs):
    319319        """
    320320        Returns a dictionary containing the calculations (aggregation)
    321321        over the current queryset
    322322
    323323        If args is present the expression is passed as a kwarg using
    324324        the Aggregate object's default alias.
    325325        """
     326        if self.query.distinct_fields:
     327            raise NotImplementedError("aggregate() + distinct(fields) not implemented.")
    326328        for arg in args:
    327329            kwargs[arg.default_alias] = arg
    328330
    329331        query = self.query.clone()
    330332
    331333        for (alias, aggregate_expr) in kwargs.items():
    332334            query.add_aggregate(aggregate_expr, self.model, alias,
    333335                is_summary=True)
     
    746748        """
    747749        assert self.query.can_filter(), \
    748750                "Cannot reorder a query once a slice has been taken."
    749751        obj = self._clone()
    750752        obj.query.clear_ordering()
    751753        obj.query.add_ordering(*field_names)
    752754        return obj
    753755
    754     def distinct(self, true_or_false=True):
     756    def distinct(self, *field_names):
    755757        """
    756758        Returns a new QuerySet instance that will select only distinct results.
    757759        """
     760        assert self.query.can_filter(), \
     761                "Cannot create distinct fields once a slice has been taken."
    758762        obj = self._clone()
    759         obj.query.distinct = true_or_false
     763        obj.query.add_distinct_fields(*field_names)
    760764        return obj
    761765
    762766    def extra(self, select=None, where=None, params=None, tables=None,
    763767              order_by=None, select_params=None):
    764768        """
    765769        Adds extra SQL fragments to the query.
    766770        """
    767771        assert self.query.can_filter(), \
     
    11741178        return self
    11751179
    11761180    def order_by(self, *field_names):
    11771181        """
    11781182        Always returns EmptyQuerySet.
    11791183        """
    11801184        return self
    11811185
    1182     def distinct(self, true_or_false=True):
     1186    def distinct(self, fields=None):
    11831187        """
    11841188        Always returns EmptyQuerySet.
    11851189        """
    11861190        return self
    11871191
    11881192    def extra(self, select=None, where=None, params=None, tables=None,
    11891193              order_by=None, select_params=None):
    11901194        """
  • django/db/models/sql/compiler.py

    diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
    a b  
    1818        self.using = using
    1919        self.quote_cache = {}
    2020
    2121    def pre_sql_setup(self):
    2222        """
    2323        Does any necessary class setup immediately prior to producing SQL. This
    2424        is for things that can't necessarily be done in __init__ because we
    2525        might not have all the pieces in place at that time.
     26        # TODO: after the query has been executed, the altered state should be
     27        # cleaned. We are not using a clone() of the query here.
    2628        """
    2729        if not self.query.tables:
    2830            self.query.join((None, self.query.model._meta.db_table, None, None))
    2931        if (not self.query.select and self.query.default_cols and not
    3032                self.query.included_inherited_models):
    3133            self.query.setup_inherited_models()
    3234        if self.query.select_related and not self.query.related_select_cols:
    3335            self.fill_related_selections()
     
    5557
    5658        If 'with_limits' is False, any limit/offset information is not included
    5759        in the query.
    5860        """
    5961        if with_limits and self.query.low_mark == self.query.high_mark:
    6062            return '', ()
    6163
    6264        self.pre_sql_setup()
     65        # After executing the query, we must get rid of any joins the query
     66        # setup created. So, take note of alias counts before the query ran.
     67        # However we do not want to get rid of stuff done in pre_sql_setup(),
     68        # as the pre_sql_setup will modify query state in a way that forbids
     69        # another run of it.
     70        self.refcounts_before = self.query.alias_refcount.copy()
    6371        out_cols = self.get_columns(with_col_aliases)
    6472        ordering, ordering_group_by = self.get_ordering()
    6573
    66         # This must come after 'select' and 'ordering' -- see docstring of
    67         # get_from_clause() for details.
     74        distinct_fields = self.get_distinct()
     75
     76        # This must come after 'select', 'ordering' and 'distinct' -- see
     77        # docstring of get_from_clause() for details.
    6878        from_, f_params = self.get_from_clause()
    6979
    7080        qn = self.quote_name_unless_alias
    7181
    7282        where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection)
    7383        having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection)
    7484        params = []
    7585        for val in self.query.extra_select.itervalues():
    7686            params.extend(val[1])
    7787
    7888        result = ['SELECT']
     89
    7990        if self.query.distinct:
    80             result.append('DISTINCT')
     91            result.append(self.connection.ops.distinct(distinct_fields))
     92
    8193        result.append(', '.join(out_cols + self.query.ordering_aliases))
    8294
    8395        result.append('FROM')
    8496        result.extend(from_)
    8597        params.extend(f_params)
    8698
    8799        if where:
    88100            result.append('WHERE %s' % where)
    89101            params.extend(w_params)
    90102
    91103        grouping, gb_params = self.get_grouping()
    92104        if grouping:
     105            if distinct_fields:
     106                raise NotImplementedError(
     107                    "annotate() + distinct(fields) not implemented.")
    93108            if ordering:
    94109                # If the backend can't group by PK (i.e., any database
    95110                # other than MySQL), then any fields mentioned in the
    96111                # ordering clause needs to be in the group by clause.
    97112                if not self.connection.features.allows_group_by_pk:
    98113                    for col, col_params in ordering_group_by:
    99114                        if col not in grouping:
    100115                            grouping.append(str(col))
     
    124139        if self.query.select_for_update and self.connection.features.has_select_for_update:
    125140            # If we've been asked for a NOWAIT query but the backend does not support it,
    126141            # raise a DatabaseError otherwise we could get an unexpected deadlock.
    127142            nowait = self.query.select_for_update_nowait
    128143            if nowait and not self.connection.features.has_select_for_update_nowait:
    129144                raise DatabaseError('NOWAIT is not supported on this database backend.')
    130145            result.append(self.connection.ops.for_update_sql(nowait=nowait))
    131146
     147        # Finally do cleanup - get rid of the joins we created above.
     148        self.query.reset_refcounts(self.refcounts_before)
     149
    132150        return ' '.join(result), tuple(params)
    133151
    134152    def as_nested_sql(self):
    135153        """
    136154        Perform the same functionality as the as_sql() method, returning an
    137155        SQL string and parameters. However, the alias prefixes are bumped
    138156        beforehand (in a copy -- the current query isn't changed), and any
    139157        ordering is removed if the query is unsliced.
     
    287305            else:
    288306                r = '%s.%s' % (qn(alias), qn2(field.column))
    289307                result.append(r)
    290308                aliases.add(r)
    291309                if with_aliases:
    292310                    col_aliases.add(field.column)
    293311        return result, aliases
    294312
     313    def get_distinct(self):
     314        """
     315        Returns a quoted list of fields to use in DISTINCT ON part of the query.
     316
     317        Note that this method can alter the tables in the query, and thus this
     318        must be called before get_from_clause().
     319        """
     320        qn = self.quote_name_unless_alias
     321        qn2 = self.connection.ops.quote_name
     322        result = []
     323        opts = self.query.model._meta
     324
     325        for name in self.query.distinct_fields:
     326            parts = name.split(LOOKUP_SEP)
     327            field, col, alias, _, _ = self._setup_joins(parts, opts, None)
     328            col, alias = self._final_join_removal(col, alias)
     329            result.append("%s.%s" % (qn(alias), qn2(col)))
     330        return result
     331
     332
    295333    def get_ordering(self):
    296334        """
    297335        Returns a tuple containing a list representing the SQL elements in the
    298336        "order by" clause, and the list of SQL elements that need to be added
    299337        to the GROUP BY clause as a result of the ordering.
    300338
    301339        Also sets the ordering_aliases attribute on this instance to a list of
    302340        extra aliases needed in the select.
     
    379417            already_seen=None):
    380418        """
    381419        Returns the table alias (the name might be ambiguous, the alias will
    382420        not be) and column name for ordering by the given 'name' parameter.
    383421        The 'name' is of the form 'field1__field2__...__fieldN'.
    384422        """
    385423        name, order = get_order_dir(name, default_order)
    386424        pieces = name.split(LOOKUP_SEP)
    387         if not alias:
    388             alias = self.query.get_initial_alias()
    389         field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
    390                 opts, alias, False)
    391         alias = joins[-1]
    392         col = target.column
    393         if not field.rel:
    394             # To avoid inadvertent trimming of a necessary alias, use the
    395             # refcount to show that we are referencing a non-relation field on
    396             # the model.
    397             self.query.ref_alias(alias)
    398 
    399         # Must use left outer joins for nullable fields and their relations.
    400         self.query.promote_alias_chain(joins,
    401             self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
     425        field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
    402426
    403427        # If we get to this point and the field is a relation to another model,
    404428        # append the default ordering for that model.
    405429        if field.rel and len(joins) > 1 and opts.ordering:
    406430            # Firstly, avoid infinite loops.
    407431            if not already_seen:
    408432                already_seen = set()
    409433            join_tuple = tuple([self.query.alias_map[j][TABLE_NAME] for j in joins])
     
    411435                raise FieldError('Infinite loop caused by ordering.')
    412436            already_seen.add(join_tuple)
    413437
    414438            results = []
    415439            for item in opts.ordering:
    416440                results.extend(self.find_ordering_name(item, opts, alias,
    417441                        order, already_seen))
    418442            return results
     443        col, alias = self._final_join_removal(col, alias)
     444        return [(alias, col, order)]
    419445
     446    def _setup_joins(self, pieces, opts, alias):
     447        """
     448        A helper method for get_ordering and get_distinct. This method will
     449        call query.setup_joins, handle refcounts and then promote the joins.
     450
     451        Note that get_ordering and get_distinct must produce same target
     452        columns on same input, as the prefixes of get_ordering and get_distinct
     453        must match. Executing SQL where this is not true is an error.
     454        """
     455        if not alias:
     456            alias = self.query.get_initial_alias()
     457        field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
     458                opts, alias, False)
     459        alias = joins[-1]
     460        col = target.column
     461        if not field.rel:
     462            # To avoid inadvertent trimming of a necessary alias, use the
     463            # refcount to show that we are referencing a non-relation field on
     464            # the model.
     465            self.query.ref_alias(alias)
     466
     467        # Must use left outer joins for nullable fields and their relations.
     468        # Ordering or distinct must not affect the returned set, and INNER
     469        # JOINS for nullable fields could do this.
     470        self.query.promote_alias_chain(joins,
     471            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
     472        return field, col, alias, joins, opts
     473
     474    def _final_join_removal(self, col, alias):
     475        """
     476        A helper method for get_distinct and get_ordering. This method will
     477        trim extra not-needed joins from the tail of the join chain.
     478
     479        This is very similar to what is done in trim_joins, but we will
     480        trim LEFT JOINS here. It would be a good idea to consolidate this
     481        method and query.trim_joins().
     482        """
    420483        if alias:
    421             # We have to do the same "final join" optimisation as in
    422             # add_filter, since the final column might not otherwise be part of
    423             # the select set (so we can't order on it).
    424484            while 1:
    425485                join = self.query.alias_map[alias]
    426486                if col != join[RHS_JOIN_COL]:
    427487                    break
    428488                self.query.unref_alias(alias)
    429489                alias = join[LHS_ALIAS]
    430490                col = join[LHS_JOIN_COL]
    431         return [(alias, col, order)]
     491        return col, alias
    432492
    433493    def get_from_clause(self):
    434494        """
    435495        Returns a list of strings that are joined together to go after the
    436496        "FROM" part of the query, as well as a list any extra parameters that
    437497        need to be included. Sub-classes, can override this to create a
    438498        from-clause via a "select".
    439499
    440500        This should only be called after any SQL construction methods that
    441         might change the tables we need. This means the select columns and
    442         ordering must be done first.
     501        might change the tables we need. This means the select columns,
     502        ordering and distinct must be done first.
    443503        """
    444504        result = []
    445505        qn = self.quote_name_unless_alias
    446506        qn2 = self.connection.ops.quote_name
    447507        first = True
    448508        for alias in self.query.tables:
    449509            if not self.query.alias_refcount[alias]:
    450510                continue
     
    9791039class SQLAggregateCompiler(SQLCompiler):
    9801040    def as_sql(self, qn=None):
    9811041        """
    9821042        Creates the SQL for this query. Returns the SQL string and list of
    9831043        parameters.
    9841044        """
    9851045        if qn is None:
    9861046            qn = self.quote_name_unless_alias
     1047
    9871048        sql = ('SELECT %s FROM (%s) subquery' % (
    9881049            ', '.join([
    9891050                aggregate.as_sql(qn, self.connection)
    9901051                for aggregate in self.query.aggregate_select.values()
    9911052            ]),
    9921053            self.query.subquery)
    9931054        )
    9941055        params = self.query.sub_params
  • django/db/models/sql/query.py

    diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
    a b  
    122122        self.tables = []    # Aliases in the order they are created.
    123123        self.where = where()
    124124        self.where_class = where
    125125        self.group_by = None
    126126        self.having = where()
    127127        self.order_by = []
    128128        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
    129129        self.distinct = False
     130        self.distinct_fields = []
    130131        self.select_for_update = False
    131132        self.select_for_update_nowait = False
    132133        self.select_related = False
    133134        self.related_select_cols = []
    134135
    135136        # SQL aggregate-related attributes
    136137        self.aggregates = SortedDict() # Maps alias -> SQL aggregate function
    137138        self.aggregate_select_mask = None
     
    260261        if self.group_by is None:
    261262            obj.group_by = None
    262263        else:
    263264            obj.group_by = self.group_by[:]
    264265        obj.having = copy.deepcopy(self.having, memo=memo)
    265266        obj.order_by = self.order_by[:]
    266267        obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
    267268        obj.distinct = self.distinct
     269        obj.distinct_fields = self.distinct_fields[:]
    268270        obj.select_for_update = self.select_for_update
    269271        obj.select_for_update_nowait = self.select_for_update_nowait
    270272        obj.select_related = self.select_related
    271273        obj.related_select_cols = []
    272274        obj.aggregates = copy.deepcopy(self.aggregates, memo=memo)
    273275        if self.aggregate_select_mask is None:
    274276            obj.aggregate_select_mask = None
    275277        else:
     
    293295        obj.extra_tables = self.extra_tables
    294296        obj.extra_order_by = self.extra_order_by
    295297        obj.deferred_loading = copy.deepcopy(self.deferred_loading, memo=memo)
    296298        if self.filter_is_sticky and self.used_aliases:
    297299            obj.used_aliases = self.used_aliases.copy()
    298300        else:
    299301            obj.used_aliases = set()
    300302        obj.filter_is_sticky = False
     303
    301304        obj.__dict__.update(kwargs)
    302305        if hasattr(obj, '_setup_query'):
    303306            obj._setup_query()
    304307        return obj
    305308
    306309    def convert_values(self, value, field, connection):
    307310        """Convert the database-returned value into a type that is consistent
    308311        across database backends.
     
    388391            in zip(query.aggregate_select.items(), result)
    389392        ])
    390393
    391394    def get_count(self, using):
    392395        """
    393396        Performs a COUNT() query using the current filter constraints.
    394397        """
    395398        obj = self.clone()
    396         if len(self.select) > 1 or self.aggregate_select:
     399        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
    397400            # If a select clause exists, then the query has already started to
    398401            # specify the columns that are to be returned.
    399402            # In this case, we need to use a subquery to evaluate the count.
    400403            from django.db.models.sql.subqueries import AggregateQuery
    401404            subquery = obj
    402405            subquery.clear_ordering(True)
    403406            subquery.clear_limits()
    404407
     
    447450        'rhs' query.
    448451        """
    449452        assert self.model == rhs.model, \
    450453                "Cannot combine queries on two different base models."
    451454        assert self.can_filter(), \
    452455                "Cannot combine queries once a slice has been taken."
    453456        assert self.distinct == rhs.distinct, \
    454457            "Cannot combine a unique query with a non-unique query."
     458        assert self.distinct_fields == rhs.distinct_fields, \
     459            "Cannot combine queries with different distinct fields."
    455460
    456461        self.remove_inherited_models()
    457462        # Work out how to relabel the rhs aliases, if necessary.
    458463        change_map = {}
    459464        used = set()
    460465        conjunction = (connector == AND)
    461466        first = True
    462467        for alias in rhs.tables:
     
    669674        self.alias_refcount[alias] = 1
    670675        self.tables.append(alias)
    671676        return alias, True
    672677
    673678    def ref_alias(self, alias):
    674679        """ Increases the reference count for this alias. """
    675680        self.alias_refcount[alias] += 1
    676681
    677     def unref_alias(self, alias):
     682    def unref_alias(self, alias, amount=1):
    678683        """ Decreases the reference count for this alias. """
    679         self.alias_refcount[alias] -= 1
     684        self.alias_refcount[alias] -= amount
    680685
    681686    def promote_alias(self, alias, unconditional=False):
    682687        """
    683688        Promotes the join type of an alias to an outer join if it's possible
    684689        for the join to contain NULL values on the left. If 'unconditional' is
    685690        False, the join is only promoted if it is nullable, otherwise it is
    686691        always promoted.
    687692
     
    700705        Walks along a chain of aliases, promoting the first nullable join and
    701706        any joins following that. If 'must_promote' is True, all the aliases in
    702707        the chain are promoted.
    703708        """
    704709        for alias in chain:
    705710            if self.promote_alias(alias, must_promote):
    706711                must_promote = True
    707712
     713    def reset_refcounts(self, to_counts):
     714        """
     715        This method will reset reference counts for aliases so that they match
     716        that given in to_counts.
     717        """
     718        for alias, cur_refcount in self.alias_refcount.copy().items():
     719            unref_amount = cur_refcount - to_counts.get(alias, 0)
     720            self.unref_alias(alias, unref_amount)
     721
    708722    def promote_unused_aliases(self, initial_refcounts, used_aliases):
    709723        """
    710724        Given a "before" copy of the alias_refcounts dictionary (as
    711725        'initial_refcounts') and a collection of aliases that may have been
    712726        changed or created, works out which aliases have been created since
    713727        then and which ones haven't been used and promotes all of those
    714728        aliases, plus any children of theirs in the alias tree, to outer joins.
    715729        """
     
    827841            self.ref_alias(alias)
    828842        else:
    829843            alias = self.join((None, self.model._meta.db_table, None, None))
    830844        return alias
    831845
    832846    def count_active_tables(self):
    833847        """
    834848        Returns the number of tables in this query with a non-zero reference
    835         count.
     849        count. Note that after execution, the reference counts are zeroed, so
     850        tables added in compiler will not be seen by this method.
    836851        """
    837852        return len([1 for count in self.alias_refcount.itervalues() if count])
    838853
    839854    def join(self, connection, always_create=False, exclusions=(),
    840855            promote=False, outer_if_first=False, nullable=False, reuse=None):
    841856        """
    842857        Returns an alias for the join in 'connection', either reusing an
    843858        existing alias for that join or creating a new one. 'connection' is a
     
    15911606        """
    15921607        Clears the list of fields to select (but not extra_select columns).
    15931608        Some queryset types completely replace any existing list of select
    15941609        columns.
    15951610        """
    15961611        self.select = []
    15971612        self.select_fields = []
    15981613
     1614    def add_distinct_fields(self, *field_names):
     1615        """
     1616        Adds and resolves the given fields to the query's "distinct on" clause.
     1617        """
     1618        self.distinct_fields = field_names
     1619        self.distinct = True
     1620
     1621
    15991622    def add_fields(self, field_names, allow_m2m=True):
    16001623        """
    16011624        Adds the given (model) fields to the select set. The field names are
    16021625        added in the order specified.
    16031626        """
    16041627        alias = self.get_initial_alias()
    16051628        opts = self.get_meta()
    16061629
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    a b  
    340340a default ordering, or when using :meth:`order_by()`). If no such ordering is
    341341defined for a given ``QuerySet``, calling ``reverse()`` on it has no real
    342342effect (the ordering was undefined prior to calling ``reverse()``, and will
    343343remain undefined afterward).
    344344
    345345distinct
    346346~~~~~~~~
    347347
    348 .. method:: distinct()
     348.. method:: distinct([*fields])
    349349
    350350Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
    351351eliminates duplicate rows from the query results.
    352352
    353353By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
    354354is rarely a problem, because simple queries such as ``Blog.objects.all()``
    355355don't introduce the possibility of duplicate result rows. However, if your
    356356query spans multiple tables, it's possible to get duplicate results when a
     
    369369    selected, the columns used in any :meth:`order_by()` (or default model
    370370    ordering) will still be involved and may affect uniqueness of the results.
    371371
    372372    The moral here is that if you are using ``distinct()`` be careful about
    373373    ordering by related models. Similarly, when using ``distinct()`` and
    374374    :meth:`values()` together, be careful when ordering by fields not in the
    375375    :meth:`values()` call.
    376376
     377.. versionadded:: 1.4
     378
     379The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
     380They are names of fields to which the ``DISTINCT`` should be limited. This
     381translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
     382duplicate rows not by comparing all fields in a row, but by comparing only the given
     383fields.
     384
     385.. note::
     386    Note that the ability to specify field names is only available in PostgreSQL.
     387
     388.. note::
     389    When using the ``DISTINCT ON`` functionality it is required that the columns given
     390    to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
     391    DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
     392    don't specify an order, then you'll get some arbitrary row.
     393
     394Examples::
     395
     396    >>> Author.objects.distinct()
     397    [...]
     398
     399    >>> Entry.objects.order_by('pub_date').distinct('pub_date')
     400    [...]
     401
     402    >>> Entry.objects.order_by('blog').distinct('blog')
     403    [...]
     404
     405    >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
     406    [...]
     407
     408    >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
     409    [...]
     410
     411    >>> Entry.objects.order_by('author', 'pub_date').distinct('author')
     412    [...]
     413
    377414values
    378415~~~~~~
    379416
    380417.. method:: values(*fields)
    381418
    382419Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns
    383420dictionaries when used as an iterable, rather than model-instance objects.
    384421
  • new file tests/modeltests/distinct_on_fields/models.py

    diff --git a/tests/modeltests/distinct_on_fields/__init__.py b/tests/modeltests/distinct_on_fields/__init__.py
    new file mode 100644
    diff --git a/tests/modeltests/distinct_on_fields/models.py b/tests/modeltests/distinct_on_fields/models.py
    new file mode 100644
    - +  
     1from django.db import models
     2
     3class Tag(models.Model):
     4    name = models.CharField(max_length=10)
     5    parent = models.ForeignKey('self', blank=True, null=True,
     6            related_name='children')
     7
     8    class Meta:
     9        ordering = ['name']
     10
     11    def __unicode__(self):
     12        return self.name
     13
     14class Celebrity(models.Model):
     15    name = models.CharField("Name", max_length=20)
     16    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
     17
     18    def __unicode__(self):
     19        return self.name
     20
     21class Fan(models.Model):
     22    fan_of = models.ForeignKey(Celebrity)
     23
     24class Staff(models.Model):
     25    id = models.IntegerField(primary_key=True)
     26    name = models.CharField(max_length=50)
     27    organisation = models.CharField(max_length=100)
     28    tags = models.ManyToManyField(Tag, through='StaffTag')
     29    coworkers = models.ManyToManyField('self')
     30
     31    def __unicode__(self):
     32        return self.name
     33
     34class StaffTag(models.Model):
     35    staff = models.ForeignKey(Staff)
     36    tag = models.ForeignKey(Tag)
     37
     38    def __unicode__(self):
     39        return u"%s -> %s" % (self.tag, self.staff)
  • new file tests/modeltests/distinct_on_fields/tests.py

    diff --git a/tests/modeltests/distinct_on_fields/tests.py b/tests/modeltests/distinct_on_fields/tests.py
    new file mode 100644
    - +  
     1from __future__ import absolute_import
     2
     3from django.db.models import Max
     4from django.test import TestCase, skipUnlessDBFeature
     5
     6from .models import Tag, Celebrity, Fan, Staff, StaffTag
     7
     8class DistinctOnTests(TestCase):
     9    def setUp(self):
     10        t1 = Tag.objects.create(name='t1')
     11        t2 = Tag.objects.create(name='t2', parent=t1)
     12        t3 = Tag.objects.create(name='t3', parent=t1)
     13        t4 = Tag.objects.create(name='t4', parent=t3)
     14        t5 = Tag.objects.create(name='t5', parent=t3)
     15
     16        p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1")
     17        p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1")
     18        p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1")
     19        p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2")
     20        p1_o1.coworkers.add(p2_o1, p3_o1)
     21        StaffTag.objects.create(staff=p1_o1, tag=t1)
     22        StaffTag.objects.create(staff=p1_o1, tag=t1)
     23
     24        celeb1 = Celebrity.objects.create(name="c1")
     25        celeb2 = Celebrity.objects.create(name="c2")
     26
     27        self.fan1 = Fan.objects.create(fan_of=celeb1)
     28        self.fan2 = Fan.objects.create(fan_of=celeb1)
     29        self.fan3 = Fan.objects.create(fan_of=celeb2)
     30
     31    @skipUnlessDBFeature('can_distinct_on_fields')
     32    def test_basic_distinct_on(self):
     33        """QuerySet.distinct('field', ...) works"""
     34        # (qset, expected) tuples
     35        qsets = (
     36            (
     37                Staff.objects.distinct().order_by('name'),
     38                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     39            ),
     40            (
     41                Staff.objects.distinct('name').order_by('name'),
     42                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     43            ),
     44            (
     45                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
     46                ['<Staff: p1>', '<Staff: p1>'],
     47            ),
     48            (
     49                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
     50                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     51            ),
     52            (
     53                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
     54                    distinct('name').order_by('name'),
     55                ['<Celebrity: c1>', '<Celebrity: c2>'],
     56            ),
     57            # Does combining querysets work?
     58            (
     59                (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\
     60                    distinct('name').order_by('name')
     61                |Celebrity.objects.filter(fan__in=[self.fan3]).\
     62                    distinct('name').order_by('name')),
     63                ['<Celebrity: c1>', '<Celebrity: c2>'],
     64            ),
     65            (
     66                StaffTag.objects.distinct('staff','tag'),
     67                ['<StaffTag: t1 -> p1>'],
     68            ),
     69            (
     70                Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
     71                ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
     72            ),
     73            (
     74                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
     75                ['<StaffTag: t1 -> p1>'],
     76            ),
     77            # Fetch the alphabetically first coworker for each worker
     78            (
     79                (Staff.objects.distinct('id').order_by('id', 'coworkers__name').
     80                               values_list('id', 'coworkers__name')),
     81                ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"]
     82            ),
     83        )
     84        for qset, expected in qsets:
     85            self.assertQuerysetEqual(qset, expected)
     86            self.assertEqual(qset.count(), len(expected))
     87
     88        # Combining queries with different distinct_fields is not allowed.
     89        base_qs = Celebrity.objects.all()
     90        self.assertRaisesMessage(
     91            AssertionError,
     92            "Cannot combine queries with different distinct fields.",
     93            lambda: (base_qs.distinct('id') & base_qs.distinct('name'))
     94        )
     95
     96        # Test join unreffing
     97        c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
     98        self.assertIn('OUTER JOIN', str(c1.query))
     99        c2 = c1.distinct('pk')
     100        self.assertNotIn('OUTER JOIN', str(c2.query))
     101
     102    @skipUnlessDBFeature('can_distinct_on_fields')
     103    def test_distinct_not_implemented_checks(self):
     104        # distinct + annotate not allowed
     105        with self.assertRaises(NotImplementedError):
     106            Celebrity.objects.annotate(Max('id')).distinct('id')[0]
     107        with self.assertRaises(NotImplementedError):
     108            Celebrity.objects.distinct('id').annotate(Max('id'))[0]
     109
     110        # However this check is done only when the query executes, so you
     111        # can use distinct() to remove the fields before execution.
     112        Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0]
     113        # distinct + aggregate not allowed
     114        with self.assertRaises(NotImplementedError):
     115            Celebrity.objects.distinct('id').aggregate(Max('id'))
     116
  • tests/regressiontests/queries/models.py

    diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
    a b  
    204204
    205205# An inter-related setup with a model subclass that has a nullable
    206206# path to another model, and a return path from that model.
    207207
    208208class Celebrity(models.Model):
    209209    name = models.CharField("Name", max_length=20)
    210210    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
    211211
     212    def __unicode__(self):
     213        return self.name
     214
    212215class TvChef(Celebrity):
    213216    pass
    214217
    215218class Fan(models.Model):
    216219    fan_of = models.ForeignKey(Celebrity)
    217220
    218221# Multiple foreign keys
    219222class LeafA(models.Model):
     
    338341        return "category item: " + str(self.category)
    339342
    340343class OneToOneCategory(models.Model):
    341344    new_name = models.CharField(max_length=15)
    342345    category = models.OneToOneField(SimpleCategory)
    343346
    344347    def __unicode__(self):
    345348        return "one2one " + self.new_name
    346 
  • tests/regressiontests/queries/tests.py

    diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
    a b  
    229229        q1 = Item.objects.filter(tags=self.t1)
    230230        q2 = Item.objects.filter(note=self.n3, tags=self.t2)
    231231        q3 = Item.objects.filter(creator=self.a4)
    232232        self.assertQuerysetEqual(
    233233            ((q1 & q2) | q3).order_by('name'),
    234234            ['<Item: four>', '<Item: one>']
    235235        )
    236236
    237     # FIXME: This is difficult to fix and very much an edge case, so punt for
    238     # now.  This is related to the order_by() tests for ticket #2253, but the
    239     # old bug exhibited itself here (q2 was pulling too many tables into the
    240     # combined query with the new ordering, but only because we have evaluated
    241     # q2 already).
    242     @unittest.expectedFailure
    243237    def test_order_by_tables(self):
    244238        q1 = Item.objects.order_by('name')
    245239        q2 = Item.objects.filter(id=self.i1.id)
    246240        list(q2)
    247241        self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
    248242
     243    def test_order_by_join_unref(self):
     244        """
     245        This test is related to the above one, testing that there aren't
     246        old JOINs in the query.
     247        """
     248        qs = Celebrity.objects.order_by('greatest_fan__fan_of')
     249        self.assertIn('OUTER JOIN', str(qs.query))
     250        qs = qs.order_by('id')
     251        self.assertNotIn('OUTER JOIN', str(qs.query))
     252
    249253    def test_tickets_4088_4306(self):
    250254        self.assertQuerysetEqual(
    251255            Report.objects.filter(creator=1001),
    252256            ['<Report: r1>']
    253257        )
    254258        self.assertQuerysetEqual(
    255259            Report.objects.filter(creator__num=1001),
    256260            ['<Report: r1>']
     
    17231727
    17241728        self.assertEqual(
    17251729            list(Node.objects.filter(node=node2)),
    17261730            [node1]
    17271731        )
    17281732
    17291733
    17301734class ConditionalTests(BaseQuerysetTest):
    1731     """Tests whose execution depend on dfferent environment conditions like
     1735    """Tests whose execution depend on different environment conditions like
    17321736    Python version or DB backend features"""
    17331737
    17341738    def setUp(self):
    17351739        generic = NamedCategory.objects.create(name="Generic")
    17361740        t1 = Tag.objects.create(name='t1', category=generic)
    17371741        t2 = Tag.objects.create(name='t2', parent=t1, category=generic)
    17381742        t3 = Tag.objects.create(name='t3', parent=t1)
    17391743        t4 = Tag.objects.create(name='t4', parent=t3)
    17401744        t5 = Tag.objects.create(name='t5', parent=t3)
    17411745
     1746
    17421747    # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
    17431748    # (Python issue 1242657), so these cases return an empty list, rather than
    17441749    # raising an exception. Not a lot we can do about that, unfortunately, due to
    17451750    # the way Python handles list() calls internally. Thus, we skip the tests for
    17461751    # Python 2.6.
    17471752    @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6")
    17481753    def test_infinite_loop(self):
    17491754        # If you're not careful, it's possible to introduce infinite loops via
     
    17861791
    17871792    # Sqlite 3 does not support passing in more than 1000 parameters except by
    17881793    # changing a parameter at compilation time.
    17891794    @skipUnlessDBFeature('supports_1000_query_parameters')
    17901795    def test_ticket14244(self):
    17911796        # Test that the "in" lookup works with lists of 1000 items or more.
    17921797        Number.objects.all().delete()
    17931798        numbers = range(2500)
     1799        # Speed up object creation by using bulk_create. However keep the
     1800        # batches somewhat small - otherwise we might hit the parameter limit
     1801        # in bulk_create.
     1802        batch = []
    17941803        for num in numbers:
    1795             _ = Number.objects.create(num=num)
     1804            batch.append(Number(num=num))
     1805            if len(batch) == 100:
     1806                Number.objects.bulk_create(batch)
     1807                batch = []
     1808        if batch:
     1809            Number.objects.bulk_create(batch)
    17961810        self.assertEqual(
    17971811            Number.objects.filter(num__in=numbers[:1000]).count(),
    17981812            1000
    17991813        )
    18001814        self.assertEqual(
    18011815            Number.objects.filter(num__in=numbers[:1001]).count(),
    18021816            1001
    18031817        )
     
    18051819            Number.objects.filter(num__in=numbers[:2000]).count(),
    18061820            2000
    18071821        )
    18081822        self.assertEqual(
    18091823            Number.objects.filter(num__in=numbers).count(),
    18101824            2500
    18111825        )
    18121826
     1827
    18131828class UnionTests(unittest.TestCase):
    18141829    """
    18151830    Tests for the union of two querysets. Bug #12252.
    18161831    """
    18171832    def setUp(self):
    18181833        objectas = []
    18191834        objectbs = []
    18201835        objectcs = []
  • tests/regressiontests/select_related_regress/tests.py

    diff --git a/tests/regressiontests/select_related_regress/tests.py b/tests/regressiontests/select_related_regress/tests.py
    a b  
    3535        connections=Connection.objects.filter(start__device__building=b, end__device__building=b).order_by('id')
    3636        self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
    3737            [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
    3838
    3939        connections=Connection.objects.filter(start__device__building=b, end__device__building=b).select_related().order_by('id')
    4040        self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
    4141            [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
    4242
    43         # This final query should only join seven tables (port, device and building
    44         # twice each, plus connection once).
    45         self.assertEqual(connections.query.count_active_tables(), 7)
     43        # This final query should only have seven tables (port, device and building
     44        # twice each, plus connection once). Thus, 6 joins plus the FROM table.
     45        self.assertEqual(str(connections.query).count(" JOIN "), 6)
    4646
    4747
    4848    def test_regression_8106(self):
    4949        """
    5050        Regression test for bug #8106.
    5151
    5252        Same sort of problem as the previous test, but this time there are
    5353        more extra tables to pull in as part of the select_related() and some
Back to Top