Code

Ticket #6422: distinct_on.14.diff

File distinct_on.14.diff, 29.1 KB (added by ramiro, 2 years ago)

Added Anssi to AUTHORS, release notes blurb, removed unrelated tests change, renamed DatabaseOperations.dictinct() to distinct_sql()

Line 
1diff -r 0c76b026c926 AUTHORS
2--- a/AUTHORS   Thu Dec 22 08:33:58 2011 +0000
3+++ b/AUTHORS   Thu Dec 22 16:53:03 2011 -0300
4@@ -203,6 +203,7 @@
5     Marc Garcia <marc.garcia@accopensys.com>
6     Andy Gayton <andy-django@thecablelounge.com>
7     geber@datacollect.com
8+    Jeffrey Gelens <jeffrey@gelens.org>
9     Baishampayan Ghose
10     Joshua Ginsberg <jag@flowtheory.net>
11     Dimitris Glezos <dimitris@glezos.com>
12@@ -269,6 +270,7 @@
13     jpellerin@gmail.com
14     junzhang.jn@gmail.com
15     Xia Kai <http://blog.xiaket.org/>
16+    Anssi Kääriäinen
17     Antti Kaihola <http://djangopeople.net/akaihola/>
18     Peter van Kampen
19     Bahadır Kandemir <bahadir@pardus.org.tr>
20diff -r 0c76b026c926 django/db/backends/__init__.py
21--- a/django/db/backends/__init__.py    Thu Dec 22 08:33:58 2011 +0000
22+++ b/django/db/backends/__init__.py    Thu Dec 22 16:53:03 2011 -0300
23@@ -406,6 +406,9 @@
24     supports_stddev = None
25     can_introspect_foreign_keys = None
26 
27+    # Support for the DISTINCT ON clause
28+    can_distinct_on_fields = False
29+
30     def __init__(self, connection):
31         self.connection = connection
32 
33@@ -559,6 +562,17 @@
34         """
35         raise NotImplementedError('Full-text search is not implemented for this database backend')
36 
37+    def distinct_sql(self, fields):
38+        """
39+        Returns an SQL DISTINCT clause which removes duplicate rows from the
40+        result set. If any fields are given, only the given fields are being
41+        checked for duplicates.
42+        """
43+        if fields:
44+            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
45+        else:
46+            return 'DISTINCT'
47+
48     def last_executed_query(self, cursor, sql, params):
49         """
50         Returns a string of the query last executed by the given cursor, with
51diff -r 0c76b026c926 django/db/backends/postgresql_psycopg2/base.py
52--- a/django/db/backends/postgresql_psycopg2/base.py    Thu Dec 22 08:33:58 2011 +0000
53+++ b/django/db/backends/postgresql_psycopg2/base.py    Thu Dec 22 16:53:03 2011 -0300
54@@ -82,6 +82,7 @@
55     has_select_for_update_nowait = True
56     has_bulk_insert = True
57     supports_tablespaces = True
58+    can_distinct_on_fields = True
59 
60 class DatabaseWrapper(BaseDatabaseWrapper):
61     vendor = 'postgresql'
62diff -r 0c76b026c926 django/db/backends/postgresql_psycopg2/operations.py
63--- a/django/db/backends/postgresql_psycopg2/operations.py      Thu Dec 22 08:33:58 2011 +0000
64+++ b/django/db/backends/postgresql_psycopg2/operations.py      Thu Dec 22 16:53:03 2011 -0300
65@@ -179,6 +179,12 @@
66 
67         return 63
68 
69+    def distinct_sql(self, fields):
70+        if fields:
71+            return 'DISTINCT ON (%s)' % ', '.join(fields)
72+        else:
73+            return 'DISTINCT'
74+
75     def last_executed_query(self, cursor, sql, params):
76         # http://initd.org/psycopg/docs/cursor.html#cursor.query
77         # The query attribute is a Psycopg extension to the DB API 2.0.
78diff -r 0c76b026c926 django/db/models/query.py
79--- a/django/db/models/query.py Thu Dec 22 08:33:58 2011 +0000
80+++ b/django/db/models/query.py Thu Dec 22 16:53:03 2011 -0300
81@@ -323,6 +323,8 @@
82         If args is present the expression is passed as a kwarg using
83         the Aggregate object's default alias.
84         """
85+        if self.query.distinct_fields:
86+            raise NotImplementedError("aggregate() + distinct(fields) not implemented.")
87         for arg in args:
88             kwargs[arg.default_alias] = arg
89 
90@@ -751,12 +753,14 @@
91         obj.query.add_ordering(*field_names)
92         return obj
93 
94-    def distinct(self, true_or_false=True):
95+    def distinct(self, *field_names):
96         """
97         Returns a new QuerySet instance that will select only distinct results.
98         """
99+        assert self.query.can_filter(), \
100+                "Cannot create distinct fields once a slice has been taken."
101         obj = self._clone()
102-        obj.query.distinct = true_or_false
103+        obj.query.add_distinct_fields(*field_names)
104         return obj
105 
106     def extra(self, select=None, where=None, params=None, tables=None,
107@@ -1179,7 +1183,7 @@
108         """
109         return self
110 
111-    def distinct(self, true_or_false=True):
112+    def distinct(self, fields=None):
113         """
114         Always returns EmptyQuerySet.
115         """
116diff -r 0c76b026c926 django/db/models/sql/compiler.py
117--- a/django/db/models/sql/compiler.py  Thu Dec 22 08:33:58 2011 +0000
118+++ b/django/db/models/sql/compiler.py  Thu Dec 22 16:53:03 2011 -0300
119@@ -23,6 +23,8 @@
120         Does any necessary class setup immediately prior to producing SQL. This
121         is for things that can't necessarily be done in __init__ because we
122         might not have all the pieces in place at that time.
123+        # TODO: after the query has been executed, the altered state should be
124+        # cleaned. We are not using a clone() of the query here.
125         """
126         if not self.query.tables:
127             self.query.join((None, self.query.model._meta.db_table, None, None))
128@@ -60,11 +62,19 @@
129             return '', ()
130 
131         self.pre_sql_setup()
132+        # After executing the query, we must get rid of any joins the query
133+        # setup created. So, take note of alias counts before the query ran.
134+        # However we do not want to get rid of stuff done in pre_sql_setup(),
135+        # as the pre_sql_setup will modify query state in a way that forbids
136+        # another run of it.
137+        self.refcounts_before = self.query.alias_refcount.copy()
138         out_cols = self.get_columns(with_col_aliases)
139         ordering, ordering_group_by = self.get_ordering()
140 
141-        # This must come after 'select' and 'ordering' -- see docstring of
142-        # get_from_clause() for details.
143+        distinct_fields = self.get_distinct()
144+
145+        # This must come after 'select', 'ordering' and 'distinct' -- see
146+        # docstring of get_from_clause() for details.
147         from_, f_params = self.get_from_clause()
148 
149         qn = self.quote_name_unless_alias
150@@ -76,8 +86,10 @@
151             params.extend(val[1])
152 
153         result = ['SELECT']
154+
155         if self.query.distinct:
156-            result.append('DISTINCT')
157+            result.append(self.connection.ops.distinct_sql(distinct_fields))
158+
159         result.append(', '.join(out_cols + self.query.ordering_aliases))
160 
161         result.append('FROM')
162@@ -90,6 +102,9 @@
163 
164         grouping, gb_params = self.get_grouping()
165         if grouping:
166+            if distinct_fields:
167+                raise NotImplementedError(
168+                    "annotate() + distinct(fields) not implemented.")
169             if ordering:
170                 # If the backend can't group by PK (i.e., any database
171                 # other than MySQL), then any fields mentioned in the
172@@ -129,6 +144,9 @@
173                 raise DatabaseError('NOWAIT is not supported on this database backend.')
174             result.append(self.connection.ops.for_update_sql(nowait=nowait))
175 
176+        # Finally do cleanup - get rid of the joins we created above.
177+        self.query.reset_refcounts(self.refcounts_before)
178+
179         return ' '.join(result), tuple(params)
180 
181     def as_nested_sql(self):
182@@ -292,6 +310,26 @@
183                     col_aliases.add(field.column)
184         return result, aliases
185 
186+    def get_distinct(self):
187+        """
188+        Returns a quoted list of fields to use in DISTINCT ON part of the query.
189+
190+        Note that this method can alter the tables in the query, and thus this
191+        must be called before get_from_clause().
192+        """
193+        qn = self.quote_name_unless_alias
194+        qn2 = self.connection.ops.quote_name
195+        result = []
196+        opts = self.query.model._meta
197+
198+        for name in self.query.distinct_fields:
199+            parts = name.split(LOOKUP_SEP)
200+            field, col, alias, _, _ = self._setup_joins(parts, opts, None)
201+            col, alias = self._final_join_removal(col, alias)
202+            result.append("%s.%s" % (qn(alias), qn2(col)))
203+        return result
204+
205+
206     def get_ordering(self):
207         """
208         Returns a tuple containing a list representing the SQL elements in the
209@@ -384,21 +422,7 @@
210         """
211         name, order = get_order_dir(name, default_order)
212         pieces = name.split(LOOKUP_SEP)
213-        if not alias:
214-            alias = self.query.get_initial_alias()
215-        field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
216-                opts, alias, False)
217-        alias = joins[-1]
218-        col = target.column
219-        if not field.rel:
220-            # To avoid inadvertent trimming of a necessary alias, use the
221-            # refcount to show that we are referencing a non-relation field on
222-            # the model.
223-            self.query.ref_alias(alias)
224-
225-        # Must use left outer joins for nullable fields and their relations.
226-        self.query.promote_alias_chain(joins,
227-            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
228+        field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
229 
230         # If we get to this point and the field is a relation to another model,
231         # append the default ordering for that model.
232@@ -416,11 +440,47 @@
233                 results.extend(self.find_ordering_name(item, opts, alias,
234                         order, already_seen))
235             return results
236+        col, alias = self._final_join_removal(col, alias)
237+        return [(alias, col, order)]
238 
239+    def _setup_joins(self, pieces, opts, alias):
240+        """
241+        A helper method for get_ordering and get_distinct. This method will
242+        call query.setup_joins, handle refcounts and then promote the joins.
243+
244+        Note that get_ordering and get_distinct must produce same target
245+        columns on same input, as the prefixes of get_ordering and get_distinct
246+        must match. Executing SQL where this is not true is an error.
247+        """
248+        if not alias:
249+            alias = self.query.get_initial_alias()
250+        field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
251+                opts, alias, False)
252+        alias = joins[-1]
253+        col = target.column
254+        if not field.rel:
255+            # To avoid inadvertent trimming of a necessary alias, use the
256+            # refcount to show that we are referencing a non-relation field on
257+            # the model.
258+            self.query.ref_alias(alias)
259+
260+        # Must use left outer joins for nullable fields and their relations.
261+        # Ordering or distinct must not affect the returned set, and INNER
262+        # JOINS for nullable fields could do this.
263+        self.query.promote_alias_chain(joins,
264+            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
265+        return field, col, alias, joins, opts
266+
267+    def _final_join_removal(self, col, alias):
268+        """
269+        A helper method for get_distinct and get_ordering. This method will
270+        trim extra not-needed joins from the tail of the join chain.
271+
272+        This is very similar to what is done in trim_joins, but we will
273+        trim LEFT JOINS here. It would be a good idea to consolidate this
274+        method and query.trim_joins().
275+        """
276         if alias:
277-            # We have to do the same "final join" optimisation as in
278-            # add_filter, since the final column might not otherwise be part of
279-            # the select set (so we can't order on it).
280             while 1:
281                 join = self.query.alias_map[alias]
282                 if col != join[RHS_JOIN_COL]:
283@@ -428,7 +488,7 @@
284                 self.query.unref_alias(alias)
285                 alias = join[LHS_ALIAS]
286                 col = join[LHS_JOIN_COL]
287-        return [(alias, col, order)]
288+        return col, alias
289 
290     def get_from_clause(self):
291         """
292@@ -438,8 +498,8 @@
293         from-clause via a "select".
294 
295         This should only be called after any SQL construction methods that
296-        might change the tables we need. This means the select columns and
297-        ordering must be done first.
298+        might change the tables we need. This means the select columns,
299+        ordering and distinct must be done first.
300         """
301         result = []
302         qn = self.quote_name_unless_alias
303@@ -984,6 +1044,7 @@
304         """
305         if qn is None:
306             qn = self.quote_name_unless_alias
307+
308         sql = ('SELECT %s FROM (%s) subquery' % (
309             ', '.join([
310                 aggregate.as_sql(qn, self.connection)
311diff -r 0c76b026c926 django/db/models/sql/query.py
312--- a/django/db/models/sql/query.py     Thu Dec 22 08:33:58 2011 +0000
313+++ b/django/db/models/sql/query.py     Thu Dec 22 16:53:03 2011 -0300
314@@ -127,6 +127,7 @@
315         self.order_by = []
316         self.low_mark, self.high_mark = 0, None  # Used for offset/limit
317         self.distinct = False
318+        self.distinct_fields = []
319         self.select_for_update = False
320         self.select_for_update_nowait = False
321         self.select_related = False
322@@ -265,6 +266,7 @@
323         obj.order_by = self.order_by[:]
324         obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
325         obj.distinct = self.distinct
326+        obj.distinct_fields = self.distinct_fields[:]
327         obj.select_for_update = self.select_for_update
328         obj.select_for_update_nowait = self.select_for_update_nowait
329         obj.select_related = self.select_related
330@@ -298,6 +300,7 @@
331         else:
332             obj.used_aliases = set()
333         obj.filter_is_sticky = False
334+
335         obj.__dict__.update(kwargs)
336         if hasattr(obj, '_setup_query'):
337             obj._setup_query()
338@@ -393,7 +396,7 @@
339         Performs a COUNT() query using the current filter constraints.
340         """
341         obj = self.clone()
342-        if len(self.select) > 1 or self.aggregate_select:
343+        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
344             # If a select clause exists, then the query has already started to
345             # specify the columns that are to be returned.
346             # In this case, we need to use a subquery to evaluate the count.
347@@ -452,6 +455,8 @@
348                 "Cannot combine queries once a slice has been taken."
349         assert self.distinct == rhs.distinct, \
350             "Cannot combine a unique query with a non-unique query."
351+        assert self.distinct_fields == rhs.distinct_fields, \
352+            "Cannot combine queries with different distinct fields."
353 
354         self.remove_inherited_models()
355         # Work out how to relabel the rhs aliases, if necessary.
356@@ -674,9 +679,9 @@
357         """ Increases the reference count for this alias. """
358         self.alias_refcount[alias] += 1
359 
360-    def unref_alias(self, alias):
361+    def unref_alias(self, alias, amount=1):
362         """ Decreases the reference count for this alias. """
363-        self.alias_refcount[alias] -= 1
364+        self.alias_refcount[alias] -= amount
365 
366     def promote_alias(self, alias, unconditional=False):
367         """
368@@ -705,6 +710,15 @@
369             if self.promote_alias(alias, must_promote):
370                 must_promote = True
371 
372+    def reset_refcounts(self, to_counts):
373+        """
374+        This method will reset reference counts for aliases so that they match
375+        the value passed in :param to_counts:.
376+        """
377+        for alias, cur_refcount in self.alias_refcount.copy().items():
378+            unref_amount = cur_refcount - to_counts.get(alias, 0)
379+            self.unref_alias(alias, unref_amount)
380+
381     def promote_unused_aliases(self, initial_refcounts, used_aliases):
382         """
383         Given a "before" copy of the alias_refcounts dictionary (as
384@@ -832,7 +846,8 @@
385     def count_active_tables(self):
386         """
387         Returns the number of tables in this query with a non-zero reference
388-        count.
389+        count. Note that after execution, the reference counts are zeroed, so
390+        tables added in compiler will not be seen by this method.
391         """
392         return len([1 for count in self.alias_refcount.itervalues() if count])
393 
394@@ -1596,6 +1611,13 @@
395         self.select = []
396         self.select_fields = []
397 
398+    def add_distinct_fields(self, *field_names):
399+        """
400+        Adds and resolves the given fields to the query's "distinct on" clause.
401+        """
402+        self.distinct_fields = field_names
403+        self.distinct = True
404+
405     def add_fields(self, field_names, allow_m2m=True):
406         """
407         Adds the given (model) fields to the select set. The field names are
408diff -r 0c76b026c926 docs/ref/models/querysets.txt
409--- a/docs/ref/models/querysets.txt     Thu Dec 22 08:33:58 2011 +0000
410+++ b/docs/ref/models/querysets.txt     Thu Dec 22 16:53:03 2011 -0300
411@@ -345,7 +345,7 @@
412 distinct
413 ~~~~~~~~
414 
415-.. method:: distinct()
416+.. method:: distinct([*fields])
417 
418 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
419 eliminates duplicate rows from the query results.
420@@ -374,6 +374,43 @@
421     :meth:`values()` together, be careful when ordering by fields not in the
422     :meth:`values()` call.
423 
424+.. versionadded:: 1.4
425+
426+The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
427+They are names of fields to which the ``DISTINCT`` should be limited. This
428+translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
429+duplicate rows not by comparing all fields in a row, but by comparing only the given
430+fields.
431+
432+.. note::
433+    Note that the ability to specify field names is only available in PostgreSQL.
434+
435+.. note::
436+    When using the ``DISTINCT ON`` functionality it is required that the columns given
437+    to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
438+    DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
439+    don't specify an order, then you'll get some arbitrary row.
440+
441+Examples::
442+
443+    >>> Author.objects.distinct()
444+    [...]
445+
446+    >>> Entry.objects.order_by('pub_date').distinct('pub_date')
447+    [...]
448+
449+    >>> Entry.objects.order_by('blog').distinct('blog')
450+    [...]
451+
452+    >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
453+    [...]
454+
455+    >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
456+    [...]
457+
458+    >>> Entry.objects.order_by('author', 'pub_date').distinct('author')
459+    [...]
460+
461 values
462 ~~~~~~
463 
464diff -r 0c76b026c926 docs/releases/1.4.txt
465--- a/docs/releases/1.4.txt     Thu Dec 22 08:33:58 2011 +0000
466+++ b/docs/releases/1.4.txt     Thu Dec 22 16:53:03 2011 -0300
467@@ -517,6 +517,16 @@
468   ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
469   cache backends.
470 
471+* Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
472+
473+  The ``distinct()`` ``Queryset`` method now accepts an optional list of model
474+  field names. If specified, then the ``DISTINCT`` statement is limited to these
475+  fields.  The PostgreSQL is the only of the database backends shipped with
476+  Django that supports this new functionality.
477+
478+  For more details, see the documentation for
479+  :meth:`~django.db.models.query.QuerySet.distinct`.
480+
481 .. _backwards-incompatible-changes-1.4:
482 
483 Backwards incompatible changes in 1.4
484diff -r 0c76b026c926 tests/modeltests/distinct_on_fields/__init__.py
485--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
486+++ b/tests/modeltests/distinct_on_fields/__init__.py   Thu Dec 22 16:53:03 2011 -0300
487@@ -0,0 +1,1 @@
488+#
489diff -r 0c76b026c926 tests/modeltests/distinct_on_fields/models.py
490--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
491+++ b/tests/modeltests/distinct_on_fields/models.py     Thu Dec 22 16:53:03 2011 -0300
492@@ -0,0 +1,39 @@
493+from django.db import models
494+
495+class Tag(models.Model):
496+    name = models.CharField(max_length=10)
497+    parent = models.ForeignKey('self', blank=True, null=True,
498+            related_name='children')
499+
500+    class Meta:
501+        ordering = ['name']
502+
503+    def __unicode__(self):
504+        return self.name
505+
506+class Celebrity(models.Model):
507+    name = models.CharField("Name", max_length=20)
508+    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
509+
510+    def __unicode__(self):
511+        return self.name
512+
513+class Fan(models.Model):
514+    fan_of = models.ForeignKey(Celebrity)
515+
516+class Staff(models.Model):
517+    id = models.IntegerField(primary_key=True)
518+    name = models.CharField(max_length=50)
519+    organisation = models.CharField(max_length=100)
520+    tags = models.ManyToManyField(Tag, through='StaffTag')
521+    coworkers = models.ManyToManyField('self')
522+
523+    def __unicode__(self):
524+        return self.name
525+
526+class StaffTag(models.Model):
527+    staff = models.ForeignKey(Staff)
528+    tag = models.ForeignKey(Tag)
529+
530+    def __unicode__(self):
531+        return u"%s -> %s" % (self.tag, self.staff)
532diff -r 0c76b026c926 tests/modeltests/distinct_on_fields/tests.py
533--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
534+++ b/tests/modeltests/distinct_on_fields/tests.py      Thu Dec 22 16:53:03 2011 -0300
535@@ -0,0 +1,116 @@
536+from __future__ import absolute_import, with_statement
537+
538+from django.db.models import Max
539+from django.test import TestCase, skipUnlessDBFeature
540+
541+from .models import Tag, Celebrity, Fan, Staff, StaffTag
542+
543+class DistinctOnTests(TestCase):
544+    def setUp(self):
545+        t1 = Tag.objects.create(name='t1')
546+        t2 = Tag.objects.create(name='t2', parent=t1)
547+        t3 = Tag.objects.create(name='t3', parent=t1)
548+        t4 = Tag.objects.create(name='t4', parent=t3)
549+        t5 = Tag.objects.create(name='t5', parent=t3)
550+
551+        p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1")
552+        p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1")
553+        p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1")
554+        p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2")
555+        p1_o1.coworkers.add(p2_o1, p3_o1)
556+        StaffTag.objects.create(staff=p1_o1, tag=t1)
557+        StaffTag.objects.create(staff=p1_o1, tag=t1)
558+
559+        celeb1 = Celebrity.objects.create(name="c1")
560+        celeb2 = Celebrity.objects.create(name="c2")
561+
562+        self.fan1 = Fan.objects.create(fan_of=celeb1)
563+        self.fan2 = Fan.objects.create(fan_of=celeb1)
564+        self.fan3 = Fan.objects.create(fan_of=celeb2)
565+
566+    @skipUnlessDBFeature('can_distinct_on_fields')
567+    def test_basic_distinct_on(self):
568+        """QuerySet.distinct('field', ...) works"""
569+        # (qset, expected) tuples
570+        qsets = (
571+            (
572+                Staff.objects.distinct().order_by('name'),
573+                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
574+            ),
575+            (
576+                Staff.objects.distinct('name').order_by('name'),
577+                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
578+            ),
579+            (
580+                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
581+                ['<Staff: p1>', '<Staff: p1>'],
582+            ),
583+            (
584+                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
585+                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
586+            ),
587+            (
588+                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
589+                    distinct('name').order_by('name'),
590+                ['<Celebrity: c1>', '<Celebrity: c2>'],
591+            ),
592+            # Does combining querysets work?
593+            (
594+                (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\
595+                    distinct('name').order_by('name')
596+                |Celebrity.objects.filter(fan__in=[self.fan3]).\
597+                    distinct('name').order_by('name')),
598+                ['<Celebrity: c1>', '<Celebrity: c2>'],
599+            ),
600+            (
601+                StaffTag.objects.distinct('staff','tag'),
602+                ['<StaffTag: t1 -> p1>'],
603+            ),
604+            (
605+                Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
606+                ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
607+            ),
608+            (
609+                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
610+                ['<StaffTag: t1 -> p1>'],
611+            ),
612+            # Fetch the alphabetically first coworker for each worker
613+            (
614+                (Staff.objects.distinct('id').order_by('id', 'coworkers__name').
615+                               values_list('id', 'coworkers__name')),
616+                ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"]
617+            ),
618+        )
619+        for qset, expected in qsets:
620+            self.assertQuerysetEqual(qset, expected)
621+            self.assertEqual(qset.count(), len(expected))
622+
623+        # Combining queries with different distinct_fields is not allowed.
624+        base_qs = Celebrity.objects.all()
625+        self.assertRaisesMessage(
626+            AssertionError,
627+            "Cannot combine queries with different distinct fields.",
628+            lambda: (base_qs.distinct('id') & base_qs.distinct('name'))
629+        )
630+
631+        # Test join unreffing
632+        c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
633+        self.assertIn('OUTER JOIN', str(c1.query))
634+        c2 = c1.distinct('pk')
635+        self.assertNotIn('OUTER JOIN', str(c2.query))
636+
637+    @skipUnlessDBFeature('can_distinct_on_fields')
638+    def test_distinct_not_implemented_checks(self):
639+        # distinct + annotate not allowed
640+        with self.assertRaises(NotImplementedError):
641+            Celebrity.objects.annotate(Max('id')).distinct('id')[0]
642+        with self.assertRaises(NotImplementedError):
643+            Celebrity.objects.distinct('id').annotate(Max('id'))[0]
644+
645+        # However this check is done only when the query executes, so you
646+        # can use distinct() to remove the fields before execution.
647+        Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0]
648+        # distinct + aggregate not allowed
649+        with self.assertRaises(NotImplementedError):
650+            Celebrity.objects.distinct('id').aggregate(Max('id'))
651+
652diff -r 0c76b026c926 tests/regressiontests/queries/models.py
653--- a/tests/regressiontests/queries/models.py   Thu Dec 22 08:33:58 2011 +0000
654+++ b/tests/regressiontests/queries/models.py   Thu Dec 22 16:53:03 2011 -0300
655@@ -209,6 +209,9 @@
656     name = models.CharField("Name", max_length=20)
657     greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
658 
659+    def __unicode__(self):
660+        return self.name
661+
662 class TvChef(Celebrity):
663     pass
664 
665@@ -343,4 +346,3 @@
666 
667     def __unicode__(self):
668         return "one2one " + self.new_name
669-
670diff -r 0c76b026c926 tests/regressiontests/queries/tests.py
671--- a/tests/regressiontests/queries/tests.py    Thu Dec 22 08:33:58 2011 +0000
672+++ b/tests/regressiontests/queries/tests.py    Thu Dec 22 16:53:03 2011 -0300
673@@ -234,18 +234,22 @@
674             ['<Item: four>', '<Item: one>']
675         )
676 
677-    # FIXME: This is difficult to fix and very much an edge case, so punt for
678-    # now.  This is related to the order_by() tests for ticket #2253, but the
679-    # old bug exhibited itself here (q2 was pulling too many tables into the
680-    # combined query with the new ordering, but only because we have evaluated
681-    # q2 already).
682-    @unittest.expectedFailure
683     def test_order_by_tables(self):
684         q1 = Item.objects.order_by('name')
685         q2 = Item.objects.filter(id=self.i1.id)
686         list(q2)
687         self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
688 
689+    def test_order_by_join_unref(self):
690+        """
691+        This test is related to the above one, testing that there aren't
692+        old JOINs in the query.
693+        """
694+        qs = Celebrity.objects.order_by('greatest_fan__fan_of')
695+        self.assertIn('OUTER JOIN', str(qs.query))
696+        qs = qs.order_by('id')
697+        self.assertNotIn('OUTER JOIN', str(qs.query))
698+
699     def test_tickets_4088_4306(self):
700         self.assertQuerysetEqual(
701             Report.objects.filter(creator=1001),
702@@ -1728,7 +1732,7 @@
703 
704 
705 class ConditionalTests(BaseQuerysetTest):
706-    """Tests whose execution depend on dfferent environment conditions like
707+    """Tests whose execution depend on different environment conditions like
708     Python version or DB backend features"""
709 
710     def setUp(self):
711@@ -1739,6 +1743,7 @@
712         t4 = Tag.objects.create(name='t4', parent=t3)
713         t5 = Tag.objects.create(name='t5', parent=t3)
714 
715+
716     # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
717     # (Python issue 1242657), so these cases return an empty list, rather than
718     # raising an exception. Not a lot we can do about that, unfortunately, due to
719@@ -1810,6 +1815,7 @@
720             2500
721         )
722 
723+
724 class UnionTests(unittest.TestCase):
725     """
726     Tests for the union of two querysets. Bug #12252.
727diff -r 0c76b026c926 tests/regressiontests/select_related_regress/tests.py
728--- a/tests/regressiontests/select_related_regress/tests.py     Thu Dec 22 08:33:58 2011 +0000
729+++ b/tests/regressiontests/select_related_regress/tests.py     Thu Dec 22 16:53:03 2011 -0300
730@@ -40,9 +40,9 @@
731         self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
732             [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
733 
734-        # This final query should only join seven tables (port, device and building
735-        # twice each, plus connection once).
736-        self.assertEqual(connections.query.count_active_tables(), 7)
737+        # This final query should only have seven tables (port, device and building
738+        # twice each, plus connection once). Thus, 6 joins plus the FROM table.
739+        self.assertEqual(str(connections.query).count(" JOIN "), 6)
740 
741 
742     def test_regression_8106(self):