Django

Code

Changeset 7455

Show
Ignore:
Timestamp:
04/24/08 11:07:07 (2 months ago)
Author:
mtredinnick
Message:

queryset-refactor: Changed the way order_by() and distinct() interact.

When using "select distinct" all ordering columns must be part of the output
(select) columns. We were previously just throwing away ordering columns that
weren't included, but there are some cases where they are needed and it's
difficult to add them in manually. So now the default behaviour is to append
any missing columns.

This can affect the output of distinct() if complicated order_by() constructs
are used, so the documentation has been updated with an explanation of what's
going on there.

Fixed #7070.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • django/branches/queryset-refactor/django/db/models/sql/query.py

    r7454 r7455  
    5353        self.default_ordering = True 
    5454        self.standard_ordering = True 
     55        self.ordering_aliases = [] 
    5556        self.start_meta = None 
    5657 
     
    140141        obj.default_ordering = self.default_ordering 
    141142        obj.standard_ordering = self.standard_ordering 
     143        obj.ordering_aliases = [] 
    142144        obj.start_meta = self.start_meta 
    143145        obj.select = self.select[:] 
     
    216218        out_cols = self.get_columns() 
    217219        ordering = self.get_ordering() 
     220 
    218221        # This must come after 'select' and 'ordering' -- see docstring of 
    219222        # get_from_clause() for details. 
    220223        from_, f_params = self.get_from_clause() 
     224 
    221225        where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias) 
    222226        params = list(self.extra_select_params) 
     
    225229        if self.distinct: 
    226230            result.append('DISTINCT') 
    227         result.append(', '.join(out_cols)) 
     231        result.append(', '.join(out_cols + self.ordering_aliases)) 
    228232 
    229233        result.append('FROM') 
     
    466470    def get_ordering(self): 
    467471        """ 
    468         Returns a tuple representing the SQL elements in the "order by" clause. 
     472        Returns list representing the SQL elements in the "order by" clause. 
     473        Also sets the ordering_aliases attribute on this instance to a list of 
     474        extra aliases needed in the select. 
    469475 
    470476        Determining the ordering SQL can change the tables we need to include, 
    471477        so this should be run *before* get_from_clause(). 
    472478        """ 
    473         # FIXME: It's an SQL-92 requirement that all ordering columns appear as 
    474         # output columns in the query (in the select statement) or be ordinals. 
    475         # We don't enforce that here, but we should (by adding to the select 
    476         # columns), for portability. 
    477479        if self.extra_order_by: 
    478480            ordering = self.extra_order_by 
     
    486488        select_aliases = self._select_aliases 
    487489        result = [] 
     490        ordering_aliases = [] 
    488491        if self.standard_ordering: 
    489492            asc, desc = ORDER_DIR['ASC'] 
     
    516519                        self.model._meta, default_order=asc): 
    517520                    elt = '%s.%s' % (qn(table), qn2(col)) 
    518                     if not distinct or elt in select_aliases: 
    519                         result.append('%s %s' % (elt, order)) 
     521                    if distinct and elt not in select_aliases: 
     522                        ordering_aliases.append(elt) 
     523                    result.append('%s %s' % (elt, order)) 
    520524            else: 
    521525                col, order = get_order_dir(field, asc) 
    522526                elt = qn(col) 
    523                 if not distinct or elt in select_aliases: 
    524                     result.append('%s %s' % (elt, order)) 
     527                if distinct and elt not in select_aliases: 
     528                    ordering_aliases.append(elt) 
     529                result.append('%s %s' % (elt, order)) 
     530        self.ordering_aliases = ordering_aliases 
    525531        return result 
    526532 
     
    13801386            return cursor 
    13811387        if result_type == SINGLE: 
     1388            if self.ordering_aliases: 
     1389                return cursor.fetchone()[:-len(results.ordering_aliases)] 
    13821390            return cursor.fetchone() 
     1391 
    13831392        # The MULTI case. 
     1393        if self.ordering_aliases: 
     1394            return order_modified_iter(cursor, len(self.ordering_aliases), 
     1395                    self.connection.features.empty_fetchmany_value) 
    13841396        return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), 
    13851397                self.connection.features.empty_fetchmany_value) 
     
    14091421    yield iter([]).next() 
    14101422 
     1423def order_modified_iter(cursor, trim, sentinel): 
     1424    """ 
     1425    Yields blocks of rows from a cursor. We use this iterator in the special 
     1426    case when extra output columns have been added to support ordering 
     1427    requirements. We must trim those extra columns before anything else can use 
     1428    the results, since they're only needed to make the SQL valid. 
     1429    """ 
     1430    for rows in iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), 
     1431            sentinel): 
     1432        yield [r[:-trim] for r in rows] 
     1433 
    14111434def setup_join_cache(sender): 
    14121435    """ 
  • django/branches/queryset-refactor/docs/db-api.txt

    r7453 r7455  
    511511...since the ``Blog`` model has no default ordering specified. 
    512512 
     513Be cautious when ordering by fields in related models if you are also using 
     514``distinct()``. See the note in the `distinct()`_ section for an explanation 
     515of how related model ordering can change the expected results. 
     516 
    513517It is permissible to specify a multi-valued field to order the results by (for 
    514518example, a ``ManyToMany`` field). Normally this won't be a sensible thing to 
     
    560564By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this 
    561565is rarely a problem, because simple queries such as ``Blog.objects.all()`` 
    562 don't introduce the possibility of duplicate result rows. 
    563  
    564 However, if your query spans multiple tables, it's possible to get duplicate 
    565 results when a ``QuerySet`` is evaluated. That's when you'd use ``distinct()``. 
     566don't introduce the possibility of duplicate result rows. However, if your 
     567query spans multiple tables, it's possible to get duplicate results when a 
     568``QuerySet`` is evaluated. That's when you'd use ``distinct()``. 
     569 
     570.. note:: 
     571    Any fields used in an ``order_by()`` call are included in the SQL 
     572    ``SELECT`` columns. This can sometimes lead to unexpected results when 
     573    used in conjuntion with ``distinct()``. If you order by fields from a 
     574    related model, those fields will be added to the selected columns and they 
     575    may make otherwise duplicate rows appear to be distinct. Since the extra 
     576    columns don't appear in the returned results (they are only there to 
     577    support ordering), it sometimes looks like non-distinct results are being 
     578    returned. 
     579 
     580    Similarly, if you use a ``values()`` query to restrict the columns 
     581    selected, the columns used in any ``order_by()`` (or default model 
     582    ordering) will still be involved and may affect uniqueness of the results. 
     583 
     584    The moral here is that if you are using ``distinct()`` be careful about 
     585    ordering by related models. Similarly, when using ``distinct()`` and 
     586    ``values()`` together, be careful when ordering by fields not in the 
     587    ``values()`` call. 
    566588 
    567589``values(*fields)`` 
     
    628650        >>> Entry.objects.values('blog_id') 
    629651        [{'blog_id': 1}, ...] 
     652    * When using ``values()`` together with ``distinct()``, be aware that 
     653      ordering can affect the results. See the note in the `distinct()`_ 
     654      section, above, for details. 
    630655 
    631656**New in Django development version:** Previously, it was not possible to pass 
     
    842867call, since they are conflicting options. 
    843868 
    844 ``extra(select=None, where=None, params=None, tables=None, order_by=None, 
    845 select_params=None)`` 
     869``extra(select=None, where=None, params=None, tables=None, order_by=None, select_params=None)`` 
    846870~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
    847871 
  • django/branches/queryset-refactor/tests/modeltests/many_to_one/models.py

    r7230 r7455  
    251251[<Reporter: John Smith>] 
    252252 
    253 # It's possible to use values() calls across many-to-one relations. 
     253# It's possible to use values() calls across many-to-one relations. (Note, too, that we clear the ordering here so as not to drag the 'headline' field into the columns being used to determine uniqueness.) 
    254254>>> d = {'reporter__first_name': u'John', 'reporter__last_name': u'Smith'} 
    255 >>> list(Article.objects.filter(reporter=r).distinct().values('reporter__first_name', 'reporter__last_name')) == [d] 
     255>>> list(Article.objects.filter(reporter=r).distinct().order_by().values('reporter__first_name', 'reporter__last_name')) == [d] 
    256256True 
    257257 
  • django/branches/queryset-refactor/tests/regressiontests/queries/models.py

    r7447 r7455  
    499499[<Item: four>] 
    500500 
    501 Bug #5321 
     501Bug #5321, #7070 
     502 
     503Ordering columns must be included in the output columns. Note that this means 
     504results that might otherwise be distinct are not (if there are multiple values 
     505in the ordering cols), as in this example. This isn't a bug; it's a warning to 
     506be careful with the selection of ordering columns. 
     507 
    502508>>> Note.objects.values('misc').distinct().order_by('note', '-misc') 
    503 [{'misc': u'foo'}, {'misc': u'bar'}
     509[{'misc': u'foo'}, {'misc': u'bar'}, {'misc': u'foo'}
    504510 
    505511Bug #4358