Context Navigation

Back to Ticket #3566

Ticket #3566: aggregates.patch

File aggregates.patch, 64.5 KB (added by Nicolas Lara, 16 years ago)
Removed a development comment. Sorry for the noise.

django/db/aggregates.py

+"""
+Classes to represent the default aggregate functions
+"""
+from django.db.models.sql.constants import LOOKUP_SEP
+from django.core.exceptions import FieldError
+def interpolate(templateStr, **kws):
+    from string import Template
+    return Template(templateStr).substitute(kws)
+class Aggregate(object):
+    """
+    Default Aggregate.
+    func
+    """
+    def __init__(self, lookup):
+        self.func = self.__class__.__name__.upper()
+        self.lookup = lookup
+        self.field_name = self.lookup.split(LOOKUP_SEP)[-1]
+        self.aliased_name = '%s__%s' % (self.lookup,
+                                        self.__class__.__name__.lower())
+        self.sql_template = '${func}(${field})'
+    def relabel_aliases(self, change_map):
+        if self.col_alias in change_map:
+            self.col_alias = change_map[self.col_alias]
+    def as_fold(self, quote_func=None):
+        if self.lookup != self.field_name:
+            raise FieldError('Joins are not allowed here.')
+        #check to raise other exceptions
+        return '%s(%s)' % (self.func, self.lookup)
+    def as_sql(self, quote_func=None):
+        if not quote_func:
+            quote_func = lambda x: x
+        return interpolate(self.sql_template,
+                           func=self.func.upper(),
+                           field='.'.join([quote_func(self.col_alias),
+                                           quote_func(self.column)]))
+class Max(Aggregate):
+    pass
+class Min(Aggregate):
+    pass
+class Avg(Aggregate):
+    pass
+class Sum(Aggregate):
+    pass
+class Count(Aggregate):
+    def __init__(self, lookup, distinct=False):
+        if distinct:
+            distinct = 'DISTINCT '
+        else:
+            distinct = ''
+        super(Count, self).__init__(lookup)
+        self.sql_template = '${func}(%s${field})' % distinct

django/db/models/sql/query.py

 from django.db import connection
 from django.db.models import signals
 from django.db.models.fields import FieldDoesNotExist
 from django.db.models.query_utils import select_related_descend
+from django.db.models.query_utils import select_related_descend, _value_or_object
 from django.db.models.sql.where import WhereNode, EverythingNode, AND, OR
 from django.db.models.sql.datastructures import Count
 from django.core.exceptions import FieldError
 …
         self.start_meta = None
         self.select_fields = []
         self.related_select_fields = []
+        self.allow_nulls = False
         self.dupe_avoidance = {}
         self.used_aliases = set()
         self.filter_is_sticky = False
 …
         obj.standard_ordering = self.standard_ordering
         obj.ordering_aliases = []
         obj.start_meta = self.start_meta
+        obj.allow_nulls = self.allow_nulls
         obj.select_fields = self.select_fields[:]
         obj.related_select_fields = self.related_select_fields[:]
         obj.dupe_avoidance = self.dupe_avoidance.copy()
 …
                     row = self.resolve_columns(row, fields)
                 yield row
+    def get_aggregation(self):
+        """
+        Returns the dictionary with the values of the existing aggregations.
+        """
+        if not self.select:
+            return {}
+        #If there is a group by clause aggregating does not add useful
+        #information but retrieves only the first row. Aggregating
+        #over the subquery instead.
+        if self.group_by:
+            from subqueries import AggregateQuery
+            obj = self.clone()
+            external = []
+            select = [i for i in enumerate(obj.select)]
+            deleted = 0
+            for (i, field) in select:
+                if hasattr(field, 'reduce') and field.reduce:
+                    external.append(field)
+                    del obj.select[i-deleted]
+                    deleted += 1
+            query = AggregateQuery(self.model, self.connection)
+            query.add_select(external)
+            query.add_subquery(obj)
+            data = [_value_or_object(x) for x in query.execute_sql(SINGLE)]
+            result = dict(zip([i.aliased_name for i in query.select], data))
+            return result
+        self.select = self.get_aggregate_list()
+        self.extra_select = {}
+        data = [_value_or_object(x) for x in self.execute_sql(SINGLE)]
+        result = dict(zip([i.aliased_name for i in self.select], data))
+        return result
     def get_count(self):
         """
         Performs a COUNT() query using the current filter constraints.
 …
             result.append(' AND '.join(self.extra_where))
         if self.group_by:
+            grouping = self.get_grouping()
+            result.append('GROUP BY %s' % ', '.join(grouping))
+            result.append('GROUP BY %s' % ', '.join(self.get_grouping()))
+        having = []
+        having_params = []
         if self.having:
+            having, h_params = self.get_having()
+            result.append('HAVING %s' % ', '.join(having))
+            params.extend(h_params)
+            qn = self.quote_name_unless_alias
+            for (aggregate, lookup_type, value) in self.having:
+                if lookup_type == 'in':
+                    having.append('%s IN (%s)' % (aggregate.as_sql(quote_func=qn),
+                                                  ', '.join(['%s'] * len(value))))
+                    having_params.extend(value)
+                elif lookup_type == 'range':
+                    having.append('%s BETWEEN %%s and %%s' %
+                                  aggregate.as_sql(quote_func=qn))
+                    having_params.extend([value[0], value[1]])
+                elif lookup_type == 'isnull':
+                    having.append('%s IS %sNULL' % (aggregate.as_sql(quote_func=qn),
+                                                    (not value and "NOT " or '')))
+                else:
+                    if lookup_type not in connection.operators:
+                        raise TypeError('Invalid lookup_type: %r' % lookup_type)
+                    having.append('%s %s' % (aggregate.as_sql(quote_func=qn),
+                                             connection.operators[lookup_type]))
+                    having_params.append(value)
+            having_clause = 'HAVING ' + ' AND '.join(having)
+            result.append(having_clause)
         if ordering:
             result.append('ORDER BY %s' % ', '.join(ordering))
 …
                 result.append('OFFSET %d' % self.low_mark)
         params.extend(self.extra_params)
+        params.extend(having_params)
         return ' '.join(result), tuple(params)
     def combine(self, rhs, connector):
 …
             self.join((None, self.model._meta.db_table, None, None))
         if self.select_related and not self.related_select_cols:
             self.fill_related_selections()
+        if self.allow_nulls:
+            self.promote_all()
     def get_columns(self, with_aliases=False):
         """
 …
             for col in self.select:
                 if isinstance(col, (list, tuple)):
                     r = '%s.%s' % (qn(col[0]), qn(col[1]))
+                    if with_aliases and col[1] in col_aliases:
+                        c_alias = 'Col%d' % len(col_aliases)
+                        result.append('%s AS %s' % (r, c_alias))
+                        aliases.add(c_alias)
+                        col_aliases.add(c_alias)
+                    if with_aliases:
+                        if col[1] in col_aliases:
+                            c_alias = 'Col%d' % len(col_aliases)
+                            result.append('%s AS %s' % (r, c_alias))
+                            aliases.add(c_alias)
+                            col_aliases.add(c_alias)
+                        else:
+                            result.append('%s AS %s' % (r, col[1]))
+                            aliases.add(r)
+                            col_aliases.add(col[1])
                     else:
                         result.append(r)
                         aliases.add(r)
                         col_aliases.add(col[1])
                 else:
+                    result.append(col.as_sql(quote_func=qn))
+                    if hasattr(col, 'aliased_name'):
+                        result.append('%s AS %s' % (col.as_sql(quote_func=qn),
+                                                    col.aliased_name))
+                    else:
+                        result.append(col.as_sql(quote_func=qn))
                     if hasattr(col, 'alias'):
                         aliases.add(col.alias)
                         col_aliases.add(col.alias)
         elif self.default_cols:
             cols, new_aliases = self.get_default_columns(with_aliases,
                     col_aliases)
 …
             asc, desc = ORDER_DIR['ASC']
         else:
             asc, desc = ORDER_DIR['DESC']
         for field in ordering:
+            found = False
+            for aggregate in self.get_aggregate_list():
+                if aggregate.aliased_name in field:
+                    if field[0] == '-':
+                        order = desc
+                    else:
+                        order = asc
+                    result.append('%s %s' % (aggregate.as_sql(), order))
+                    found = True
+            if found:
+                continue
             if field == '?':
                 result.append(self.connection.ops.random_function_sql())
                 continue
 …
         """ Decreases the reference count for this alias. """
         self.alias_refcount[alias] -= 1
+    def promote_all(self):
+        """ Promotes every alias """
+        for alias in self.alias_map:
+            self.promote_alias(alias, unconditional=True)
     def promote_alias(self, alias, unconditional=False):
         """
         Promotes the join type of an alias to an outer join if it's possible
 …
             alias = self.join((None, self.model._meta.db_table, None, None))
         return alias
+    def is_aggregate(self, obj):
+        from django.db.aggregates import Aggregate
+        return isinstance(obj, Aggregate)
+    def get_aggregate_list(self, attribute=None):
+        from django.db.aggregates import Aggregate
+        if not attribute:
+            return [x for x in self.select if isinstance(x, Aggregate)]
+        else:
+            return [getattr(x, attribute) for x in self.select
+                    if isinstance(x, Aggregate)]
     def count_active_tables(self):
         """
         Returns the number of tables in this query with a non-zero reference
 …
             self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
                     used, next, restricted, new_nullable, dupe_set, avoid)
+    def add_aggregate(self, aggregate_expr, model):
+        """
+        Adds a single aggregate expression to the Query
+        """
+        opts = model._meta
+        #Do not waste time in checking the joins if it's an aggregate
+        #on an annotation
+        if (self.group_by and aggregate_expr.reduce):
+            self.select.append(aggregate_expr)
+            return
+        field_list = aggregate_expr.lookup.split(LOOKUP_SEP)
+        if (len(field_list) > 1 or
+            field_list[0] not in [i.name for i in opts.fields]):
+            field, target, opts, join_list, last, _ = self.setup_joins(
+                field_list, opts, self.get_initial_alias(), False)
+            self.allow_nulls = True
+            aggregate_expr.column = target.column
+            field_name = field_list.pop()
+            aggregate_expr.col_alias = join_list[-1]
+        else:
+            field_name = field_list[0]
+            aggregate_expr.col_alias = opts.db_table
+            fields = dict([(field.name, field) for field in opts.fields])
+            aggregate_expr.column = fields[field_name].column
+        self.select.append(aggregate_expr)
     def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
             can_reuse=None, process_extras=True):
         """
 …
         alias = self.get_initial_alias()
         allow_many = trim or not negate
+        for i in self.get_aggregate_list():
+            if i.aliased_name == parts[0] :
+                self.having.append((i, lookup_type, value))
+                return
         try:
             field, target, opts, join_list, last, extra_filters = self.setup_joins(
                     parts, opts, alias, True, allow_many, can_reuse=can_reuse,
 …
         """
         return not (self.low_mark or self.high_mark)
     def add_fields(self, field_names, allow_m2m=True):
+    def add_fields(self, field_names, allow_m2m=True, rebuild=False):
         """
         Adds the given (model) fields to the select set. The field names are
         added in the order specified.
+        If rebuild is True, the field list is rebuilded from scratch
+        keeping only the aggregate objects.
         """
         alias = self.get_initial_alias()
         opts = self.get_meta()
+        aggregates = []
+        if rebuild:
+            aggregates = self.get_aggregate_list()
+            self.select = []
         try:
             for name in field_names:
                 field, target, u2, joins, u3, u4 = self.setup_joins(
 …
             names.sort()
             raise FieldError("Cannot resolve keyword %r into field. "
                     "Choices are: %s" % (name, ", ".join(names)))
+        self.select.extend(aggregates)
     def add_ordering(self, *ordering):
         """
 …
         if force_empty:
             self.default_ordering = False
+    def set_group_by(self):
+        if self.connection.features.allows_group_by_pk:
+            if len(self.select) == len(self.model._meta.fields):
+                #there might be problems with the aliases here. check.
+                self.group_by.append('.'.join([self.model._meta.db_table,
+                                               self.model._meta.pk.column]))
+                return
+        for sel in self.select:
+            if not self.is_aggregate(sel):
+                self.group_by.append(sel)
     def add_count_column(self):
         """
         Converts the query to do count(...) or count(distinct(pk)) in order to

django/db/models/sql/subqueries.py

     def get_ordering(self):
         return ()
+class AggregateQuery(Query):
+    """
+    An AggregateQuery takes another query as a parameter to the FROM
+    clause and only selects the elements in the provided list.
+    """
+    #CK Clean this
+    def add_select(self, select):
+        self.select = select
+    def add_subquery(self, query):
+        self.subquery, self.sub_params = query.as_sql(with_col_aliases=True)
+    def as_sql(self, quote_func=None):
+        """
+        Creates the SQL for this query. Returns the SQL string and list of
+        parameters.
+        """
+        sql = ('SELECT %s FROM (%s) AS subquery' %
+               (', '.join([i.as_fold() for i in self.select]), self.subquery))
+        params = self.sub_params
+        return (sql, params)

django/db/models/manager.py

     def filter(self, *args, **kwargs):
         return self.get_query_set().filter(*args, **kwargs)
+    def aggregate(self, *args, **kwargs):
+        return self.get_query_set().aggregate(*args, **kwargs)
+    def annotate(self, *args, **kwargs):
+        return self.get_query_set().annotate(*args, **kwargs)
     def complex_filter(self, *args, **kwargs):
         return self.get_query_set().complex_filter(*args, **kwargs)

django/db/models/query.py

     from sets import Set as set     # Python 2.3 fallback
 from django.db import connection, transaction, IntegrityError
+from django.db.aggregates import Aggregate
 from django.db.models.fields import DateField
 from django.db.models.query_utils import Q, select_related_descend
+from django.db.models.query_utils import Q, select_related_descend, _value_or_object
 from django.db.models import signals, sql
 from django.utils.datastructures import SortedDict
 …
         max_depth = self.query.max_depth
         extra_select = self.query.extra_select.keys()
         index_start = len(extra_select)
+        for row in self.query.results_iter():
+        for row in self.query.results_iter():
             if fill_cache:
                 obj, _ = get_cached_row(self.model, row, index_start,
                         max_depth, requested=requested)
+                obj, aggregate_start = get_cached_row(self.model, row,
+                                    index_start, max_depth, requested=requested)
             else:
+                obj = self.model(*row[index_start:])
+                aggregate_start = index_start + len(self.model._meta.fields)
+                #ommit aggregates in object creation
+                obj = self.model(*row[index_start:aggregate_start])
             for i, k in enumerate(extra_select):
                 setattr(obj, k, row[i])
+            data_length = len(row)
+            if aggregate_start < data_length:
+                #the aggregate values retreived from the backend
+                aggregate_values = [_value_or_object(row[i])
+                                    for i in range(aggregate_start, data_length)]
+                select =  self.query.extra_select.keys() + self.query.select
+                #Add the attributes to the model
+                new_values = dict(zip(
+                    [select[i].aliased_name
+                     for i in range(aggregate_start, len(select))],
+                    aggregate_values))
+                obj.__dict__.update(new_values)
             yield obj
+    def aggregate(self, *args, **kwargs):
+        """
+        Returns a dictionary containing the calculations (aggregation)
+        over the current queryset
+        If args is present the expression is passed as a kwarg ussing
+        the Aggregate object's default alias.
+        """
+        for arg in args:
+            kwargs[arg.aliased_name] = arg
+        for (alias, aggregate_expr) in kwargs.items():
+            aggregate_expr.aliased_name = alias
+            aggregate_expr.reduce = True
+            self.query.add_aggregate(aggregate_expr, self.model)
+        return self.query.get_aggregation()
     def count(self):
         """
         Performs a SELECT COUNT() and returns the number of records as an
 …
         """
         self.query.select_related = other.query.select_related
+    def annotate(self, *args, **kwargs):
+        self.return_groups = kwargs.get('grouped_objects')
+        try:
+            del kwargs['grouped_objects']
+        except:
+            pass
+        for arg in args:
+            kwargs[arg.aliased_name] = arg
+        opts = self.model._meta
+        obj = self._clone(return_groups=self.return_groups)
+        if isinstance(obj, ValuesQuerySet):
+            obj.query.set_group_by()
+            #obj.query.group_by.extend(obj.query.select[:])
+        if not obj.query.group_by:
+            field_names = [f.attname for f in opts.fields]
+            obj.query.add_fields(field_names, False)
+            obj.query.set_group_by()
+        for (alias, aggregate_expr) in kwargs.items():
+            aggregate_expr.aliased_name = alias
+            aggregate_expr.reduce = False
+            obj.query.add_aggregate(aggregate_expr, self.model)
+        return obj
     def order_by(self, *field_names):
         """
         Returns a new QuerySet instance with the ordering changed.
 …
         """
         pass
 class ValuesQuerySet(QuerySet):
     def __init__(self, *args, **kwargs):
         super(ValuesQuerySet, self).__init__(*args, **kwargs)
 …
             len(self.field_names) != len(self.model._meta.fields)):
             self.query.trim_extra_select(self.extra_names)
         names = self.query.extra_select.keys() + self.field_names
+        names.extend([x.aliased_name for x in self.query.select
+                      if isinstance(x, Aggregate)])
+        aggregate_start = len(self._fields) or len(self.model._meta.fields)
         for row in self.query.results_iter():
+            yield dict(zip(names, row))
+            normalized_row = list(row)
+            for i in range(aggregate_start, len(normalized_row)):
+                normalized_row[i] = _value_or_object(normalized_row[i])
+            num_fields = len(self.model._meta.fields)
+            has_grouping = (len(row) > aggregate_start and
+                            len(self.field_names) < num_fields and
+                            len(self.query.group_by) < num_fields)
+            #Grouped objects QuerySet
+            if (hasattr(self, 'return_groups') and self.return_groups):
+                restrictions = dict(zip(names, normalized_row[:aggregate_start]))
+                group_query = self.model.objects.filter(**restrictions)
+                yield (dict(zip(names, normalized_row)), group_query)
+            else:
+                yield dict(zip(names, normalized_row))
     def _setup_query(self):
         """
         Constructs the field_names list that the values query will be
 …
         Called by the _clone() method after initializing the rest of the
         instance.
         """
+        """
         self.extra_names = []
         if self._fields:
             if not self.query.extra_select:
 …
             # Default to all fields.
             field_names = [f.attname for f in self.model._meta.fields]
         self.query.add_fields(field_names, False)
+        self.query.add_fields(field_names, False, rebuild=True)
         self.query.default_cols = False
         self.field_names = field_names

django/db/models/query_utils.py

         return False
     return True
+def _value_or_object(obj):
+    try:
+        return float(obj)
+    except:
+        return obj

django/db/backends/mysql/base.py

 class DatabaseFeatures(BaseDatabaseFeatures):
     empty_fetchmany_value = ()
     update_can_self_select = False
+    allows_group_by_pk = True
     related_fields_match_type = True
 class DatabaseOperations(BaseDatabaseOperations):

django/db/backends/init.py

         return util.CursorDebugWrapper(cursor, self)
 class BaseDatabaseFeatures(object):
+    allows_group_by_pk = False
     # True if django.db.backend.utils.typecast_timestamp is used on values
     # returned from dates() calls.
     needs_datetime_string_cast = True

tests/modeltests/aggregation/fixtures/initial_data.json

+[
+ {
+  "pk": 1,
+  "model": "aggregation.publisher",
+  "fields": {
+   "name": "Apress ",
+   "num_awards": 3
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation.publisher",
+  "fields": {
+   "name": "Sams",
+   "num_awards": 1
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation.publisher",
+  "fields": {
+   "name": "Prentice Hall",
+   "num_awards": 7
+  }
+ },
+ {
+  "pk": 4,
+  "model": "aggregation.publisher",
+  "fields": {
+   "name": "Morgan Kaufmann",
+   "num_awards": 9
+  }
+ },
+ {
+  "pk": 1,
+  "model": "aggregation.book",
+  "fields": {
+   "publisher": 1,
+   "isbn": "159059725",
+   "name": "The Definitive Guide to Django: Web Development Done Right",
+   "price": 30.0,
+   "authors": [
+,
+   ],
+   "pages": 447
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation.book",
+  "fields": {
+   "publisher": 2,
+   "isbn": "067232959",
+   "name": "Sams Teach Yourself Django in 24 Hours",
+   "price": 23.09,
+   "authors": [
+   ],
+   "pages": 528
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation.book",
+  "fields": {
+   "publisher": 1,
+   "isbn": "159059996",
+   "name": "Practical Django Projects",
+   "price": 29.69,
+   "authors": [
+   ],
+   "pages": 300
+  }
+ },
+ {
+  "pk": 4,
+  "model": "aggregation.book",
+  "fields": {
+   "publisher": 3,
+   "isbn": "013235613",
+   "name": "Python Web Development with Django",
+   "price": 29.69,
+   "authors": [
+,
+,
+   ],
+   "pages": 350
+  }
+ },
+ {
+  "pk": 5,
+  "model": "aggregation.book",
+  "fields": {
+   "publisher": 3,
+   "isbn": "013790395",
+   "name": "Artificial Intelligence: A Modern Approach",
+   "price": 82.8,
+   "authors": [
+,
+   ],
+   "pages": 1132
+  }
+ },
+ {
+  "pk": 6,
+  "model": "aggregation.book",
+  "fields": {
+   "publisher": 4,
+   "isbn": "155860191",
+   "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
+   "price": 75.0,
+   "authors": [
+   ],
+   "pages": 946
+  }
+ },
+ {
+  "pk": 1,
+  "model": "aggregation.store",
+  "fields": {
+   "books": [
+,
+,
+,
+,
+,
+   ],
+   "name": "Amazon.com"
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation.store",
+  "fields": {
+   "books": [
+,
+,
+,
+   ],
+   "name": "Books.com"
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation.store",
+  "fields": {
+   "books": [
+,
+,
+   ],
+   "name": "Mamma and Pappa's Books"
+  }
+ },
+ {
+  "pk": 1,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 34,
+   "friends": [
+,
+   ],
+   "name": "Adrian Holovaty"
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 35,
+   "friends": [
+,
+   ],
+   "name": "Jacob Kaplan-Moss"
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 45,
+   "friends": [],
+   "name": "Brad Dayley"
+  }
+ },
+ {
+  "pk": 4,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 29,
+   "friends": [
+   ],
+   "name": "James Bennett"
+  }
+ },
+ {
+  "pk": 5,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 37,
+   "friends": [
+,
+   ],
+   "name": "Jeffrey Forcier "
+  }
+ },
+ {
+  "pk": 6,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 29,
+   "friends": [
+,
+   ],
+   "name": "Paul Bissex"
+  }
+ },
+ {
+  "pk": 7,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 25,
+   "friends": [
+,
+,
+   ],
+   "name": "Wesley J. Chun"
+  }
+ },
+ {
+  "pk": 8,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 57,
+   "friends": [
+   ],
+   "name": "Peter Norvig"
+  }
+ },
+ {
+  "pk": 9,
+  "model": "aggregation.author",
+  "fields": {
+   "age": 46,
+   "friends": [
+   ],
+   "name": "Stuart Russell"
+  }
+ }
+]

tests/modeltests/aggregation/models.py

+# coding: utf-8
+from django.db import models
+class Author(models.Model):
+   name = models.CharField(max_length=100)
+   age = models.IntegerField()
+   friends = models.ManyToManyField('self', blank=True)
+   def __unicode__(self):
+      return self.name
+class Publisher(models.Model):
+   name = models.CharField(max_length=300)
+   num_awards = models.IntegerField()
+   def __unicode__(self):
+      return self.name
+class Book(models.Model):
+   isbn = models.CharField(max_length=9)
+   name = models.CharField(max_length=300)
+   pages = models.IntegerField()
+   price = models.FloatField()
+   authors = models.ManyToManyField(Author)
+   publisher = models.ForeignKey(Publisher)
+   def __unicode__(self):
+      return self.name
+class Store(models.Model):
+   name = models.CharField(max_length=300)
+   books = models.ManyToManyField(Book)
+   def __unicode__(self):
+      return self.name
+class Entries(models.Model):
+   EntryID = models.AutoField(primary_key=True, db_column='Entry ID')
+   Entry = models.CharField(unique=True, max_length=50)
+   Exclude = models.BooleanField()
+class Clues(models.Model):
+   ID = models.AutoField(primary_key=True)
+   EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID')
+   Clue = models.CharField(max_length=150)
+# Tests on 'aggergate'
+# Different backends and numbers.
+__test__ = {'API_TESTS': """
+>>> from django.core import management
+# Reset the database representation of this app.
+# This will return the database to a clean initial state.
+>>> management.call_command('flush', verbosity=0, interactive=False)
+# Empty Call
+>>> Author.objects.all().aggregate()
+{}
+>>> from django.db.aggregates import Avg, Sum, Count, Max, Min
+# Note that rounding of floating points is being used for the tests to
+# pass for all backends
+# Single model aggregation
+#
+# Simple
+# Average Author age
+>>> Author.objects.all().aggregate(Avg('age'))
+{'age__avg': 37.4...}
+# Multiple
+# Average and Sum of Author's age
+>>> Author.objects.all().aggregate(Sum('age'), Avg('age'))
+{'age__sum': 337.0, 'age__avg': 37.4...}
+# After aplying other modifiers
+# Sum of the age of those older than 29 years old
+>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age'))
+{'age__sum': 254.0}
+# Depth-1 Joins
+#
+# On Relationships with self
+# Average age of those with friends (not exactelly.
+# That would be: Author.objects.all().exclude(friends=None).aggregate(Avg('age')))
+>>> Author.objects.all().aggregate(Avg('friends__age'))
+{'friends__age__avg': 34.07...}
+# On ManyToMany Relationships
+#
+# Forward
+# Average age of the Authors of Books that cost less than 50 USD
+>>> Book.objects.all().filter(price__lt=50).aggregate(Avg('authors__age'))
+{'authors__age__avg': 33.42...}
+# Backward
+# Average price of the Books whose Author's name contains the letter 'a'
+>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__price'))
+{'book__price__avg': 37.54...}
+# On OneToMany Relationships
+#
+# Forward
+# Sum of the number of awards of each Book's Publisher
+>>> Book.objects.all().aggregate(Sum('publisher__num_awards'))
+{'publisher__num_awards__sum': 30.0}
+# Backward
+# Sum of the price of every Book that has a Publisher
+>>> Publisher.objects.all().aggregate(Sum('book__price'))
+{'book__price__sum': 270.269...}
+# Multiple Joins
+#
+#Forward
+>>> Store.objects.all().aggregate(Max('books__authors__age'))
+{'books__authors__age__max': 57.0}
+#Backward
+>>> Author.objects.all().aggregate(Min('book__publisher__num_awards'))
+{'book__publisher__num_awards__min': 1.0}
+# You can also use aliases.
+#
+# Average amazon.com Book price
+>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__price'))
+{'amazon_mean': 45.04...}
+# Tests on annotate()
+#
+# An empty annotate call does nothing but return the same QuerySet
+>>> Book.objects.all().annotate().order_by('pk')
+[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Sams Teach Yourself Django in 24 Hours>, <Book: Practical Django Projects>, <Book: Python Web Development with Django>, <Book: Artificial Intelligence: A Modern Approach>, <Book: Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp>]
+#Annotate inserts the alias into the model object with the aggregated result
+>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age'))
+>>> books.get(pk=1).name
+u'The Definitive Guide to Django: Web Development Done Right'
+>>> books.get(pk=1).mean_age
+.5
+#Calls to values() are not commutative over annotate().
+#Calling values on a queryset that has annotations returns the output
+#as a dictionary
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values()
+[{'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
+#Calling it with paramters reduces the output but does not remove the
+#annotation.
+>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name')
+[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}]
+#An empty values() call before annotating has the same effect as an
+#empty values() call after annotating
+>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age'))
+[{'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
+#Calling annotate() on a ValuesQuerySet annotates over the groups of
+#fields to be selected by the ValuesQuerySet.
+#Note that an extra parameter is added to each dictionary. This
+#parameter is a queryset representing the objects that have been
+#grouped to generate the annotation
+>>> Book.objects.all().values('price').annotate(number=Count('authors__id'), mean_age=Avg('authors__age')).order_by('price')
+[{'price': 23.09, 'number': 1.0, 'mean_age': 45.0}, {'price': 29.690000000000001, 'number': 4.0, 'mean_age': 30.0}, {'price': 30.0, 'number': 2.0, 'mean_age': 34.5}, {'price': 75.0, 'number': 1.0, 'mean_age': 57.0}, {'price': 82.799999999999997, 'number': 2.0, 'mean_age': 51.5}]
+#Notice that the output includes all Authors but the value of the aggregation
+#is 0 for those that have no friends.
+#(consider having a neutral ('zero') element for each operation)
+>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id')
+>>> len(authors)
+>>> for i in authors:
+...     print i.name, i.friends__age__avg
+...
+Adrian Holovaty 32.0
+Jacob Kaplan-Moss 29.5
+Brad Dayley None
+James Bennett 34.0
+Jeffrey Forcier  27.0
+Paul Bissex 31.0
+Wesley J. Chun 33.66...
+Peter Norvig 46.0
+Stuart Russell 57.0
+#The Count aggregation function allows an extra parameter: distinct.
+#
+>>> Book.objects.all().aggregate(Count('price'))
+{'price__count': 6.0}
+>>> Book.objects.all().aggregate(Count('price', distinct=True))
+{'price__count': 5.0}
+#Retreiving the grouped objects
+#When using Count you can also ommit the primary key and refer only to
+#the related field name if you want to count all the related objects
+#and not a specific column
+>>> explicit = list(Author.objects.annotate(Count('book__id')))
+>>> implicit = list(Author.objects.annotate(Count('book')))
+>>> explicit == implicit
+True
+##
+# Ordering is allowed on aggregates
+>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest')
+[{'price': 30.0, 'oldest': 35.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 75.0, 'oldest': 57.0}, {'price': 82.7..., 'oldest': 57.0}]
+>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('-oldest')
+[{'price': 75.0, 'oldest': 57.0}, {'price': 82.7..., 'oldest': 57.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 30.0, 'oldest': 35.0}]
+>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('-oldest', 'price')
+[{'price': 75.0, 'oldest': 57.0}, {'price': 82.7..., 'oldest': 57.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 30.0, 'oldest': 35.0}]
+>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('-oldest', '-price')
+[{'price': 82.7..., 'oldest': 57.0}, {'price': 75.0, 'oldest': 57.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 30.0, 'oldest': 35.0}]
+# It is possible to aggregate over anotated values
+#
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
+{'num_authors__avg': 1.66...}
+# You can filter the results based on the aggregation alias.
+#
+#Lets add a publisher to test the different possibilities for filtering
+>>> p = Publisher(name='Expensive Publisher', num_awards=0)
+>>> p.save()
+>>> Book(name='ExpensiveBook1', pages=1, isbn='111', price=1000, publisher=p).save()
+>>> Book(name='ExpensiveBook2', pages=1, isbn='222', price=1000, publisher=p).save()
+>>> Book(name='ExpensiveBook3', pages=1, isbn='333', price=35, publisher=p).save()
+#Consider the following queries:
+#Publishers that have:
+#(i) more than one book
+>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+#(ii) a book that cost less than 40
+>>> Publisher.objects.filter(book__price__lt=40).order_by('pk')
+[<Publisher: Apress >, <Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+#(iii) more than one book and (at least) a book that cost less than 40
+>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=40).order_by('pk')
+[<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+#(iv) more than one book that costs less than 40
+>>> Publisher.objects.filter(book__price__lt=40).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress >]
+# Now a bit of testing on the different lookup types
+#
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk')
+[<Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk')
+[<Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>]
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk')
+[<Publisher: Sams>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
+>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True)
+[]
+>>> p.delete()
+# Community tests
+#
+#Thanks to Russell for the following set
+#
+#Does Author X have any friends? (or better, how many friends does author X have)
+>> Author.objects.filter(pk=1).aggregate(Count('friends__id'))
+{'friends__id__count': 2.0}
+#Give me a list of all Books with more than 1 authors
+>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk')
+[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Artificial Intelligence: A Modern Approach>]
+#Give me a list of all Authors that have no friends
+>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk')
+[<Author: Brad Dayley>]
+#Give me a list of all publishers that have published more than 1 books
+>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+[<Publisher: Apress >, <Publisher: Prentice Hall>]
+#Give me a list of all publishers that have published more than 1 books that cost less than 30
+#>>> Publisher.objects.all().filter(book__price__lt=40).annotate(num_books=Count('book__id')).filter(num_books__gt=1)
+[<Publisher: Apress >]
+#Give me a list of all Books that were written by X and one other author.
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1)
+[<Book: Artificial Intelligence: A Modern Approach>]
+#Give me the average price of all Books that were written by X and one other author.
+#(Aggregate over objects discovered using membership of the m2m set)
+#Adding an existing author to another book to test it the right way
+>>> a = Author.objects.get(name__contains='Norvig')
+>>> b = Book.objects.get(name__contains='Done Right')
+>>> b.authors.add(a)
+>>> b.save()
+#This should do it
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('price'))
+{'price__avg': 56.39...}
+>>> b.authors.remove(a)
+#
+# --- Just one of the hard ones left ---
+#
+#Give me a list of all Authors that have published a book with at least one other person
+#(Filters over a count generated on a related object)
+#
+# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0]
+# F-Syntax is required. Will be fixed after F objects are available
+#Thanks to Karen for the following set
+# Tests on fields with different names and spaces. (but they work =) )
+>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True))
+[]
+"""}

tests/regressiontests/aggregation_regress/fixtures/initial_data.json

+[
+ {
+  "pk": 1,
+  "model": "aggregation_regress.publisher",
+  "fields": {
+   "name": "Apress ",
+   "num_awards": 3
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation_regress.publisher",
+  "fields": {
+   "name": "Sams",
+   "num_awards": 1
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation_regress.publisher",
+  "fields": {
+   "name": "Prentice Hall",
+   "num_awards": 7
+  }
+ },
+ {
+  "pk": 4,
+  "model": "aggregation_regress.publisher",
+  "fields": {
+   "name": "Morgan Kaufmann",
+   "num_awards": 9
+  }
+ },
+ {
+  "pk": 1,
+  "model": "aggregation_regress.book",
+  "fields": {
+   "publisher": 1,
+   "isbn": "159059725",
+   "name": "The Definitive Guide to Django: Web Development Done Right",
+   "price": 30.0,
+   "authors": [
+,
+   ],
+   "pages": 447
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation_regress.book",
+  "fields": {
+   "publisher": 2,
+   "isbn": "067232959",
+   "name": "Sams Teach Yourself Django in 24 Hours",
+   "price": 23.09,
+   "authors": [
+   ],
+   "pages": 528
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation_regress.book",
+  "fields": {
+   "publisher": 1,
+   "isbn": "159059996",
+   "name": "Practical Django Projects",
+   "price": 29.69,
+   "authors": [
+   ],
+   "pages": 300
+  }
+ },
+ {
+  "pk": 4,
+  "model": "aggregation_regress.book",
+  "fields": {
+   "publisher": 3,
+   "isbn": "013235613",
+   "name": "Python Web Development with Django",
+   "price": 29.69,
+   "authors": [
+,
+,
+   ],
+   "pages": 350
+  }
+ },
+ {
+  "pk": 5,
+  "model": "aggregation_regress.book",
+  "fields": {
+   "publisher": 3,
+   "isbn": "013790395",
+   "name": "Artificial Intelligence: A Modern Approach",
+   "price": 82.8,
+   "authors": [
+,
+   ],
+   "pages": 1132
+  }
+ },
+ {
+  "pk": 6,
+  "model": "aggregation_regress.book",
+  "fields": {
+   "publisher": 4,
+   "isbn": "155860191",
+   "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
+   "price": 75.0,
+   "authors": [
+   ],
+   "pages": 946
+  }
+ },
+ {
+  "pk": 1,
+  "model": "aggregation_regress.store",
+  "fields": {
+   "books": [
+,
+,
+,
+,
+,
+   ],
+   "name": "Amazon.com"
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation_regress.store",
+  "fields": {
+   "books": [
+,
+,
+,
+   ],
+   "name": "Books.com"
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation_regress.store",
+  "fields": {
+   "books": [
+,
+,
+   ],
+   "name": "Mamma and Pappa's Books"
+  }
+ },
+ {
+  "pk": 1,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 34,
+   "friends": [
+,
+   ],
+   "name": "Adrian Holovaty"
+  }
+ },
+ {
+  "pk": 2,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 35,
+   "friends": [
+,
+   ],
+   "name": "Jacob Kaplan-Moss"
+  }
+ },
+ {
+  "pk": 3,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 45,
+   "friends": [],
+   "name": "Brad Dayley"
+  }
+ },
+ {
+  "pk": 4,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 29,
+   "friends": [
+   ],
+   "name": "James Bennett"
+  }
+ },
+ {
+  "pk": 5,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 37,
+   "friends": [
+,
+   ],
+   "name": "Jeffrey Forcier "
+  }
+ },
+ {
+  "pk": 6,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 29,
+   "friends": [
+,
+   ],
+   "name": "Paul Bissex"
+  }
+ },
+ {
+  "pk": 7,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 25,
+   "friends": [
+,
+,
+   ],
+   "name": "Wesley J. Chun"
+  }
+ },
+ {
+  "pk": 8,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 57,
+   "friends": [
+   ],
+   "name": "Peter Norvig"
+  }
+ },
+ {
+  "pk": 9,
+  "model": "aggregation_regress.author",
+  "fields": {
+   "age": 46,
+   "friends": [
+   ],
+   "name": "Stuart Russell"
+  }
+ }
+]

tests/regressiontests/aggregation_regress/models.py

+# coding: utf-8
+from django.db import models
+class Author(models.Model):
+   name = models.CharField(max_length=100)
+   age = models.IntegerField()
+   friends = models.ManyToManyField('self', blank=True)
+   def __unicode__(self):
+      return self.name
+   class Admin:
+      pass
+class Publisher(models.Model):
+   name = models.CharField(max_length=300)
+   num_awards = models.IntegerField()
+   def __unicode__(self):
+      return self.name
+   class Admin:
+      pass
+class Book(models.Model):
+   isbn = models.CharField(max_length=9)
+   name = models.CharField(max_length=300)
+   pages = models.IntegerField()
+   price = models.FloatField()
+   authors = models.ManyToManyField(Author)
+   publisher = models.ForeignKey(Publisher)
+   def __unicode__(self):
+      return self.name
+   class Admin:
+      pass
+class Store(models.Model):
+   name = models.CharField(max_length=300)
+   books = models.ManyToManyField(Book)
+   def __unicode__(self):
+      return self.name
+   class Admin:
+      pass
+#Extra does not play well with values. Modify the tests if/when this is fixed.
+__test__ = {'API_TESTS': """
+>>> from django.core import management
+>>> from django.db.models import get_app
+# Reset the database representation of this app.
+# This will return the database to a clean initial state.
+>>> management.call_command('flush', verbosity=0, interactive=False)
+>>> from django.db.aggregates import Avg, Sum, Count, Max, Min
+>>> Book.objects.all().aggregate(Sum('pages'), Avg('pages'))
+{'pages__sum': 3703.0, 'pages__avg': 617.1...}
+>>> Book.objects.all().values().aggregate(Sum('pages'), Avg('pages'))
+{'pages__sum': 3703.0, 'pages__avg': 617.1...}
+>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).aggregate(Sum('pages'))
+{'pages__sum': 3703.0}
+>>> Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1).__dict__
+{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447}
+>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).annotate(mean_auth_age=Avg('authors__age')).get(pk=1).__dict__
+{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447}
+>>> Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values().get(pk=1)
+{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1.0, 'pages': 447}
+>>> Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values('name').get(pk=1)
+{'mean_auth_age': 34.5, 'name': u'The Definitive Guide to Django: Web Development Done Right'}
+>>> Book.objects.all().values().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1)
+{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1.0, 'pages': 447}
+>>> Book.objects.all().values('name').annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1)
+{'mean_auth_age': 34.5, 'name': u'The Definitive Guide to Django: Web Development Done Right'}
+#Check that all of the objects are getting counted (allow_nulls) and that values respects the amount of objects
+>>> len(Author.objects.all().annotate(Avg('friends__age')).values())
+#Check that consecutive calls to annotate dont break group by
+>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest').annotate(Max('publisher__num_awards'))
+[{'price': 30.0, 'oldest': 35.0, 'publisher__num_awards__max': 3.0}, {'price': 29.69..., 'oldest': 37.0, 'publisher__num_awards__max': 7.0}, {'price': 23.09, 'oldest': 45.0, 'publisher__num_awards__max': 1.0}, {'price': 75.0, 'oldest': 57.0, 'publisher__num_awards__max': 9.0}, {'price': 82.7..., 'oldest': 57.0, 'publisher__num_awards__max': 7.0}]
+#Checks fixed bug with multiple aggregate objects in the aggregate call
+>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('price'), Sum('num_authors'))
+{'num_authors__sum': 10.0, 'price__max': 82.7...}
+"""
+}

tests/regressiontests/queries/models.py

 # An empty values() call includes all aliases, including those from an extra()
 >>> dicts = qs.values().order_by('id')
 >>> [sorted(d.items()) for d in dicts]
 [[('author_id', 2), ('good', 0), ('id', 1), ('rank', 2)], [('author_id', 3), ('good', 0), ('id', 2), ('rank', 1)], [('author_id', 1), ('good', 1), ('id', 3), ('rank', 3)]]
+[[('author_id', 2.0), ('good', 0), ('id', 1), ('rank', 2)], [('author_id', 3.0), ('good', 0), ('id', 2), ('rank', 1)], [('author_id', 1.0), ('good', 1), ('id', 3), ('rank', 3)]]
 Bugs #2874, #3002
 >>> qs = Item.objects.select_related().order_by('note__note', 'name')
 …
 >>> len([x[2] for x in q.alias_map.values() if x[2] == q.LOUTER and q.alias_refcount[x[1]]])
-A check to ensure we don't break the internal query construction of GROUP BY
-and HAVING. These aren't supported in the public API, but the Query class knows
-about them and shouldn't do bad things.
->>> qs = Tag.objects.values_list('parent_id', flat=True).order_by()
->>> qs.query.group_by = ['parent_id']
->>> qs.query.having = ['count(parent_id) > 1']
->>> expected = [t3.parent_id, t4.parent_id]
->>> expected.sort()
->>> result = list(qs)
->>> result.sort()
->>> expected == result
-True
 """}
 # In Python 2.3 and the Python 2.6 beta releases, exceptions raised in __len__

AUTHORS

     AgarFu <heaven@croasanaso.sytes.net>
     Dagur Páll Ammendrup <dagurp@gmail.com>
     Collin Anderson <cmawebsite@gmail.com>
+    Nicolas Lara <nicolaslara@gmail.com>
     Jeff Anderson <jefferya@programmerq.net>
     Andreas
     andy@jadedplanet.net

docs/aggregation.txt

+=============
+ Aggregation
+=============
+**New in Django development version**
+Aggergation works on top of an existing QuerySet allowing you to do
+calculations over sets of objects at the database level.
+The calculations to be retrieved are expressed using Aggregate
+objects.
+Aggregate objects
+=================
+Aggregate objects define the correspondance between the lookup being
+done in ORM syntax and the query that is executed in the backend.
+All Aggregate objects take the field to be aggregated upon as a string
+and an optional alias to represent the calculation in the result.
+Field representations are done in the same way as in field
+lookups. For example::
+    Max(total_price='price')
+would represent the maximum price of the selected objects in the model
+aggregated upon and it would be refered to as "total_price" while::
+    Avg(mean_age='friends__age')
+would represent the maximum of the related field for all the objects
+related to the objects in the model aggregated upon, likewise refered
+to as "mean_age".
+There are many possible ways to use aggregation so the actual results
+of different lookups will become clearer in the documentation for
+``aggregate()`` and ``annotate()``.
+If the alias is not present a default alias is defined according to
+each Aggregate object. So::
+     Min('friend__height')
+would be refered as "friend__height__max".
+For every aggregate object that spans multiple models, if the field of
+the related model to be used in the aggregation is not specified, the
+field defaults to the primary key.
+Aggregate objects are located at ``django.db.aggregates``. Every
+aggregate object is a subclass of ``Aggregate``. A empty subclass of
+aggregate called Func could be used like this::
+          Func('field')
+and have the following SQL equivalent:::
+    SELECT FUNC(field) as field__func ...
+The following Aggregate subclases are pre-defined:
+Max
+---
+Calculates the maximum on the given field.
+Default alias: ``field__max``
+Min
+---
+Calculates the minimum on the given field.
+Default alias: ``field__min``
+Avg
+---
+Calculates the average on the given field.
+Default alias: ``field__avg``
+Sum
+---
+Calculates the sumation on the given field.
+Default alias: ``field__sum``
+Count
+-----
+Counts the objects in which the field is not "Null". For counting
+regardles of the field please refer to `count()`_.
+Count takes an optional parameter: *distinct*.
+Distinct, if True, reduces the output counting repetitions on a field only once.
+If distinct is True
+   Count(field, distinct=True)
+has the SQL equivalent:
+    COUNT(DISTINCT field)
+otherwise it is:
+    COUNT(field)
+Default alias: ``field__count``
+.. _count(): ../db-api/#count
+Methods that do aggregation
+===========================
+For this section we'll refer to the following models::
+    class Author(models.Model):
+       name = models.CharField(max_length=100)
+       age = models.IntegerField()
+       friends = models.ManyToManyField('self', blank=True)
+    class Publisher(models.Model):
+       name = models.CharField(max_length=300)
+       num_awards = models.IntegerField()
+    class Book(models.Model):
+       isbn = models.CharField(max_length=9)
+       name = models.CharField(max_length=300)
+       pages = models.IntegerField()
+       price = models.FloatField()
+       authors = models.ManyToManyField(Author)
+       publisher = models.ForeignKey(Publisher)
+    class Store(models.Model):
+       name = models.CharField(max_length=300)
+       books = models.ManyToManyField(Book)
+aggregate(args, kwargs)
+-----------------------
+Returns a dictionary containing the calculations (aggregation) over
+the current queryset.
+    >>> Book.objects.aggregate(Avg('price'), highest_price=Max('price'))
+    {'price__avg': 45.045000000000002, 'highest_price': 82.799999999999997}
+You can also do aggregate lookups on related models.
+    >>> Author.objects.aggregate(Sum('book__price'))
+    {'book__price__sum': 442.44999999999999}
+it is important to notice that the previous query reads "The sum of
+the price of every book for every author". So if a book has many
+authors its price will be added as many times as authors the book
+has. If you would be interested, instead, in "the sum of the price for
+all books" you would need to do a query like this::
+    >>> Book.objects.aggregate(Sum('price'))
+    {'price__sum': 270.26999999999998}
+.. note::
+   It is importante to notice that aggregate() is a terminal
+   clause. This means that it does *not* return a queryset and no
+   other modifiers can be applied after it.
+annotate(args, kwargs)
+----------------------
+Returns a QuerySet extended with the results of the calculations on
+the given fields. So if you need to retrieve the "age for the oldest
+author of each book" you could do:
+    >>> books = Book.objects.annotate(Max('authors__age'))
+    >>> books[0].name
+    u'Python Web Development With Django'
+    >>> books[0].authors.all()
+    [<Author: Jeffrey Forcier >, <Author: Paul Bissex>, <Author: Wesley J. Chun>]
+    >>> books[0].authors__age__max
+.0
+And the output would be the model object extended with the aggregation
+information.
+grouping
+~~~~~~~~
+Sometimes you want to annotate, not on the whole set of objects but on
+those that share the same value for some fields. To do this, you
+appply values() before annotating. For example if you want to retrieve
+the average author age for the books of the same price you could do::
+    >>> books = Book.objects.values('price').annotate(oldest=Max('authors__age'))
+    >>> for book_group in books:
+    ...    print 'price', book_group['price'], 'oldest', book_group['oldest']
+    ...
+    price 29.69 oldest 37.0
+    price 75.0 oldest 57.0
+    price 82.8 oldest 57.0
+    price 23.09 oldest 45.0
+    price 30.0 oldest 35.0
+Note that aplying values after annotate() does not have the same
+efect. It reduces the output but no grouping is made:
+    >>> books = Book.objects.annotate(Max('authors__age')).values('price') #An entry for every Book
+    >>> for i in books:
+    ...    print 'price', i['price'], 'max', i['authors__age__max']
+    price 23.09 max 45.0
+    price 29.69 max 37.0
+    price 75.0 max 57.0
+    price 82.8 max 57.0
+    price 30.0 max 35.0
+    price 29.69 max 29.0
+    >>> len(Book.objects.annotate(Max('authors__age')).values('price')) #An entry for every Book
+    >>> len(Book.objects.values('price').annotate(Max('authors__age'))) #Books are grouped by price
+grouped_objects
+~~~~~~~~~~~~~~~
+Also, after doing an annotation, one might need to recover the
+elements that were grouped to do the calculation. To do this, the
+grouped_objects argument to annotate is provided. This argument, if
+True, changes the output format so the result is a list of tuples
+containing the values of the grouping and a queryset to retreive the
+the objects that were grouped.
+This changes the output in a way that for each result there's a tuple
+containing the result of the aggregation and a queryset to retrieve
+the objects that were grouped
+    >>> books = Book.objects.values('price').annotate(oldest=Max('authors__age'), grouped_objects=True)
+    >>> books[0]
+    ({'price': 29.690000000000001, 'oldest': 37.0},
+     [<Book: Practical Django Projects>, <Book: Python Web Development with Django>])
+.. note::
+   As normal querysets the queryset returned by ``grouped_objects`` is
+   lazy and will not be executed until it is evaluated. So if the
+   objects change before evaluating the queryset the aggregated result
+   might not hold.
+filtering
+~~~~~~~~~
+Another thing you might need is to retreive only certain objects based
+on the result of a calculation. To do this the filtering syntax is
+used on the alias of the annotation. See filter(link) for more
+information on the lookups.
+There are four different types of filtering that you might be
+interested in. Each of this have adiferent representation.
+    * Simple filtering on the annotations
+          an example of this is retreiving the "Publishers that have more than one book"
+            >>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+            [<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+    * Simple filtering on the whole set
+          This is a normal, un-related to aggregation, filter. "Publishers that have books that cost les than 40"
+             >>> Publisher.objects.filter(book__price__lt=40).order_by('pk')
+             [<Publisher: Apress >, <Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+    * Annotationg on the whole set and filtering on annotations
+          "Publishers that have more than one book and (at least) a book that cost less than 40"
+             >>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=40).order_by('pk')
+             [<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
+    * Filtering and annotating on the whole set
+          "Publishers that have more than one book that costs less than 40"
+             >>> Publisher.objects.filter(book__price__lt=40).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
+             [<Publisher: Apress >]
+The reason for this types of filtering to exist is because the
+filtering results vary depending whether they are done in a join of
+two models or the single model. When you need the filtering to be done
+on the model and not the join of two models the filtering must be done
+before calling the annotation. If, on the contrary, the filtering
+should be done on the result of the joining it must be done after
+annotating.
+Aggregating on annotated values
+-------------------------------
+It is possible to apply ``aggregate()`` on the result of an annotation
+that does not gorup objects. Doing this will generate a subquery for
+the annotated objects and calculate the aggregation on top of it.
+This way, if you wanted to calculate the average number of authors per
+book you could do::
+    >>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
+    {'num_authors__avg': 1.66...}

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #3566: aggregates.patch

django/db/aggregates.py

django/db/models/sql/query.py

django/db/models/sql/subqueries.py

django/db/models/manager.py

django/db/models/query.py

django/db/models/query_utils.py

django/db/backends/mysql/base.py

django/db/backends/init.py

tests/modeltests/aggregation/fixtures/initial_data.json

tests/modeltests/aggregation/models.py

tests/regressiontests/aggregation_regress/fixtures/initial_data.json

tests/regressiontests/aggregation_regress/models.py

tests/regressiontests/queries/models.py

AUTHORS

docs/aggregation.txt

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us