Ticket #3566: aggregates.patch

File aggregates.patch, 64.5 KB (added by Nicolas Lara, 16 years ago)

Removed a development comment. Sorry for the noise.

  • django/db/aggregates.py

     
     1"""
     2Classes to represent the default aggregate functions
     3"""
     4
     5from django.db.models.sql.constants import LOOKUP_SEP
     6from django.core.exceptions import FieldError
     7
     8def interpolate(templateStr, **kws):
     9    from string import Template
     10    return Template(templateStr).substitute(kws)
     11
     12class Aggregate(object):
     13    """
     14    Default Aggregate.
     15    func
     16    """
     17    def __init__(self, lookup):
     18        self.func = self.__class__.__name__.upper()
     19        self.lookup = lookup
     20        self.field_name = self.lookup.split(LOOKUP_SEP)[-1]
     21        self.aliased_name = '%s__%s' % (self.lookup,
     22                                        self.__class__.__name__.lower())
     23        self.sql_template = '${func}(${field})'
     24
     25    def relabel_aliases(self, change_map):
     26        if self.col_alias in change_map:
     27            self.col_alias = change_map[self.col_alias]
     28
     29    def as_fold(self, quote_func=None):
     30        if self.lookup != self.field_name:
     31            raise FieldError('Joins are not allowed here.')
     32        #check to raise other exceptions
     33        return '%s(%s)' % (self.func, self.lookup)
     34
     35    def as_sql(self, quote_func=None):
     36        if not quote_func:
     37            quote_func = lambda x: x
     38        return interpolate(self.sql_template,
     39                           func=self.func.upper(),
     40                           field='.'.join([quote_func(self.col_alias),
     41                                           quote_func(self.column)]))
     42                           
     43class Max(Aggregate):
     44    pass
     45
     46class Min(Aggregate):
     47    pass
     48
     49class Avg(Aggregate):
     50    pass
     51
     52class Sum(Aggregate):
     53    pass
     54
     55class Count(Aggregate):
     56    def __init__(self, lookup, distinct=False):
     57        if distinct:
     58            distinct = 'DISTINCT '
     59        else:
     60            distinct = ''
     61        super(Count, self).__init__(lookup)
     62        self.sql_template = '${func}(%s${field})' % distinct
     63       
     64       
     65
  • django/db/models/sql/query.py

     
    1515from django.db import connection
    1616from django.db.models import signals
    1717from django.db.models.fields import FieldDoesNotExist
    18 from django.db.models.query_utils import select_related_descend
     18from django.db.models.query_utils import select_related_descend, _value_or_object
    1919from django.db.models.sql.where import WhereNode, EverythingNode, AND, OR
    2020from django.db.models.sql.datastructures import Count
    2121from django.core.exceptions import FieldError
     
    5757        self.start_meta = None
    5858        self.select_fields = []
    5959        self.related_select_fields = []
     60        self.allow_nulls = False
    6061        self.dupe_avoidance = {}
    6162        self.used_aliases = set()
    6263        self.filter_is_sticky = False
     
    167168        obj.standard_ordering = self.standard_ordering
    168169        obj.ordering_aliases = []
    169170        obj.start_meta = self.start_meta
     171        obj.allow_nulls = self.allow_nulls
    170172        obj.select_fields = self.select_fields[:]
    171173        obj.related_select_fields = self.related_select_fields[:]
    172174        obj.dupe_avoidance = self.dupe_avoidance.copy()
     
    217219                    row = self.resolve_columns(row, fields)
    218220                yield row
    219221
     222    def get_aggregation(self):
     223        """
     224        Returns the dictionary with the values of the existing aggregations.
     225        """
     226        if not self.select:
     227            return {}
     228       
     229        #If there is a group by clause aggregating does not add useful
     230        #information but retrieves only the first row. Aggregating
     231        #over the subquery instead.
     232        if self.group_by:
     233            from subqueries import AggregateQuery
     234            obj = self.clone()
     235            external = []
     236            select = [i for i in enumerate(obj.select)]
     237            deleted = 0
     238            for (i, field) in select:           
     239                if hasattr(field, 'reduce') and field.reduce:
     240                    external.append(field)
     241                    del obj.select[i-deleted]
     242                    deleted += 1
     243            query = AggregateQuery(self.model, self.connection)
     244            query.add_select(external)
     245            query.add_subquery(obj)
     246
     247            data = [_value_or_object(x) for x in query.execute_sql(SINGLE)]
     248            result = dict(zip([i.aliased_name for i in query.select], data))
     249            return result
     250
     251        self.select = self.get_aggregate_list()
     252        self.extra_select = {}
     253
     254        data = [_value_or_object(x) for x in self.execute_sql(SINGLE)]
     255        result = dict(zip([i.aliased_name for i in self.select], data))
     256
     257        return result
     258
    220259    def get_count(self):
    221260        """
    222261        Performs a COUNT() query using the current filter constraints.
     
    289328            result.append(' AND '.join(self.extra_where))
    290329
    291330        if self.group_by:
    292             grouping = self.get_grouping()
    293             result.append('GROUP BY %s' % ', '.join(grouping))
     331            result.append('GROUP BY %s' % ', '.join(self.get_grouping()))
    294332
     333        having = []
     334        having_params = []
    295335        if self.having:
    296             having, h_params = self.get_having()
    297             result.append('HAVING %s' % ', '.join(having))
    298             params.extend(h_params)
     336            qn = self.quote_name_unless_alias
     337            for (aggregate, lookup_type, value) in self.having:
     338                if lookup_type == 'in':
     339                    having.append('%s IN (%s)' % (aggregate.as_sql(quote_func=qn),
     340                                                  ', '.join(['%s'] * len(value))))
     341                    having_params.extend(value)
     342                elif lookup_type == 'range':
     343                    having.append('%s BETWEEN %%s and %%s' %
     344                                  aggregate.as_sql(quote_func=qn))
     345                    having_params.extend([value[0], value[1]])
     346                elif lookup_type == 'isnull':
     347                    having.append('%s IS %sNULL' % (aggregate.as_sql(quote_func=qn),
     348                                                    (not value and "NOT " or '')))
     349                else:
     350                    if lookup_type not in connection.operators:
     351                        raise TypeError('Invalid lookup_type: %r' % lookup_type)
     352                       
     353                    having.append('%s %s' % (aggregate.as_sql(quote_func=qn),
     354                                             connection.operators[lookup_type]))
     355                    having_params.append(value)
     356                   
     357            having_clause = 'HAVING ' + ' AND '.join(having)
     358            result.append(having_clause)
    299359
    300360        if ordering:
    301361            result.append('ORDER BY %s' % ', '.join(ordering))
     
    311371                result.append('OFFSET %d' % self.low_mark)
    312372
    313373        params.extend(self.extra_params)
     374        params.extend(having_params)
    314375        return ' '.join(result), tuple(params)
    315376
    316377    def combine(self, rhs, connector):
     
    413474            self.join((None, self.model._meta.db_table, None, None))
    414475        if self.select_related and not self.related_select_cols:
    415476            self.fill_related_selections()
     477        if self.allow_nulls:
     478            self.promote_all()
    416479
    417480    def get_columns(self, with_aliases=False):
    418481        """
     
    436499            for col in self.select:
    437500                if isinstance(col, (list, tuple)):
    438501                    r = '%s.%s' % (qn(col[0]), qn(col[1]))
    439                     if with_aliases and col[1] in col_aliases:
    440                         c_alias = 'Col%d' % len(col_aliases)
    441                         result.append('%s AS %s' % (r, c_alias))
    442                         aliases.add(c_alias)
    443                         col_aliases.add(c_alias)
     502                    if with_aliases:
     503                        if col[1] in col_aliases:
     504                            c_alias = 'Col%d' % len(col_aliases)
     505                            result.append('%s AS %s' % (r, c_alias))
     506                            aliases.add(c_alias)
     507                            col_aliases.add(c_alias)
     508                        else:
     509                            result.append('%s AS %s' % (r, col[1]))
     510                            aliases.add(r)
     511                            col_aliases.add(col[1])
    444512                    else:
    445513                        result.append(r)
    446514                        aliases.add(r)
    447515                        col_aliases.add(col[1])
    448516                else:
    449                     result.append(col.as_sql(quote_func=qn))
     517                    if hasattr(col, 'aliased_name'):
     518                        result.append('%s AS %s' % (col.as_sql(quote_func=qn),
     519                                                    col.aliased_name))
     520                    else:
     521                        result.append(col.as_sql(quote_func=qn))
     522
    450523                    if hasattr(col, 'alias'):
    451524                        aliases.add(col.alias)
    452525                        col_aliases.add(col.alias)
     526                       
    453527        elif self.default_cols:
    454528            cols, new_aliases = self.get_default_columns(with_aliases,
    455529                    col_aliases)
     
    621695            asc, desc = ORDER_DIR['ASC']
    622696        else:
    623697            asc, desc = ORDER_DIR['DESC']
     698
    624699        for field in ordering:
     700            found = False
     701            for aggregate in self.get_aggregate_list():
     702                if aggregate.aliased_name in field:
     703                    if field[0] == '-':
     704                        order = desc
     705                    else:
     706                        order = asc
     707                    result.append('%s %s' % (aggregate.as_sql(), order))
     708                    found = True
     709            if found:
     710                continue
    625711            if field == '?':
    626712                result.append(self.connection.ops.random_function_sql())
    627713                continue
     
    747833        """ Decreases the reference count for this alias. """
    748834        self.alias_refcount[alias] -= 1
    749835
     836    def promote_all(self):
     837        """ Promotes every alias """
     838        for alias in self.alias_map:
     839            self.promote_alias(alias, unconditional=True)
     840       
    750841    def promote_alias(self, alias, unconditional=False):
    751842        """
    752843        Promotes the join type of an alias to an outer join if it's possible
     
    884975            alias = self.join((None, self.model._meta.db_table, None, None))
    885976        return alias
    886977
     978    def is_aggregate(self, obj):
     979        from django.db.aggregates import Aggregate
     980        return isinstance(obj, Aggregate)
     981
     982    def get_aggregate_list(self, attribute=None):
     983        from django.db.aggregates import Aggregate
     984        if not attribute:
     985            return [x for x in self.select if isinstance(x, Aggregate)]
     986        else:
     987            return [getattr(x, attribute) for x in self.select
     988                    if isinstance(x, Aggregate)]
     989           
    887990    def count_active_tables(self):
    888991        """
    889992        Returns the number of tables in this query with a non-zero reference
     
    10611164            self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
    10621165                    used, next, restricted, new_nullable, dupe_set, avoid)
    10631166
     1167    def add_aggregate(self, aggregate_expr, model):
     1168        """
     1169        Adds a single aggregate expression to the Query
     1170        """
     1171        opts = model._meta
     1172
     1173        #Do not waste time in checking the joins if it's an aggregate
     1174        #on an annotation
     1175        if (self.group_by and aggregate_expr.reduce):
     1176            self.select.append(aggregate_expr)
     1177            return
     1178       
     1179        field_list = aggregate_expr.lookup.split(LOOKUP_SEP)
     1180
     1181        if (len(field_list) > 1 or
     1182            field_list[0] not in [i.name for i in opts.fields]):
     1183           
     1184            field, target, opts, join_list, last, _ = self.setup_joins(
     1185                field_list, opts, self.get_initial_alias(), False)
     1186
     1187            self.allow_nulls = True           
     1188            aggregate_expr.column = target.column
     1189
     1190            field_name = field_list.pop()
     1191            aggregate_expr.col_alias = join_list[-1]
     1192        else:
     1193            field_name = field_list[0]
     1194            aggregate_expr.col_alias = opts.db_table
     1195
     1196            fields = dict([(field.name, field) for field in opts.fields])
     1197            aggregate_expr.column = fields[field_name].column
     1198        self.select.append(aggregate_expr)       
     1199
    10641200    def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
    10651201            can_reuse=None, process_extras=True):
    10661202        """
     
    11161252        alias = self.get_initial_alias()
    11171253        allow_many = trim or not negate
    11181254
     1255        for i in self.get_aggregate_list():
     1256            if i.aliased_name == parts[0] :
     1257                self.having.append((i, lookup_type, value))
     1258                return
     1259       
    11191260        try:
    11201261            field, target, opts, join_list, last, extra_filters = self.setup_joins(
    11211262                    parts, opts, alias, True, allow_many, can_reuse=can_reuse,
     
    15141655        """
    15151656        return not (self.low_mark or self.high_mark)
    15161657
    1517     def add_fields(self, field_names, allow_m2m=True):
     1658    def add_fields(self, field_names, allow_m2m=True, rebuild=False):
    15181659        """
    15191660        Adds the given (model) fields to the select set. The field names are
    15201661        added in the order specified.
     1662
     1663        If rebuild is True, the field list is rebuilded from scratch
     1664        keeping only the aggregate objects.
    15211665        """
    15221666        alias = self.get_initial_alias()
    15231667        opts = self.get_meta()
     1668
     1669        aggregates = []
     1670        if rebuild:
     1671            aggregates = self.get_aggregate_list()
     1672            self.select = []
     1673           
    15241674        try:
    15251675            for name in field_names:
    15261676                field, target, u2, joins, u3, u4 = self.setup_joins(
     
    15451695            names.sort()
    15461696            raise FieldError("Cannot resolve keyword %r into field. "
    15471697                    "Choices are: %s" % (name, ", ".join(names)))
     1698        self.select.extend(aggregates)
    15481699
    15491700    def add_ordering(self, *ordering):
    15501701        """
     
    15761727        if force_empty:
    15771728            self.default_ordering = False
    15781729
     1730    def set_group_by(self):
     1731        if self.connection.features.allows_group_by_pk:
     1732            if len(self.select) == len(self.model._meta.fields):
     1733                #there might be problems with the aliases here. check.
     1734                self.group_by.append('.'.join([self.model._meta.db_table,
     1735                                               self.model._meta.pk.column]))
     1736                return
     1737
     1738        for sel in self.select:
     1739            if not self.is_aggregate(sel):
     1740                self.group_by.append(sel)
     1741
    15791742    def add_count_column(self):
    15801743        """
    15811744        Converts the query to do count(...) or count(distinct(pk)) in order to
  • django/db/models/sql/subqueries.py

     
    409409
    410410    def get_ordering(self):
    411411        return ()
     412
     413class AggregateQuery(Query):
     414    """
     415    An AggregateQuery takes another query as a parameter to the FROM
     416    clause and only selects the elements in the provided list.
     417    """
     418    #CK Clean this
     419    def add_select(self, select):
     420        self.select = select
     421
     422    def add_subquery(self, query):
     423        self.subquery, self.sub_params = query.as_sql(with_col_aliases=True)
     424
     425    def as_sql(self, quote_func=None):
     426        """
     427        Creates the SQL for this query. Returns the SQL string and list of
     428        parameters.
     429        """
     430        sql = ('SELECT %s FROM (%s) AS subquery' %
     431               (', '.join([i.as_fold() for i in self.select]), self.subquery))
     432        params = self.sub_params
     433        return (sql, params)
     434               
  • django/db/models/manager.py

     
    101101    def filter(self, *args, **kwargs):
    102102        return self.get_query_set().filter(*args, **kwargs)
    103103
     104    def aggregate(self, *args, **kwargs):
     105        return self.get_query_set().aggregate(*args, **kwargs)
     106
     107    def annotate(self, *args, **kwargs):
     108        return self.get_query_set().annotate(*args, **kwargs)
     109
    104110    def complex_filter(self, *args, **kwargs):
    105111        return self.get_query_set().complex_filter(*args, **kwargs)
    106112
  • django/db/models/query.py

     
    44    from sets import Set as set     # Python 2.3 fallback
    55
    66from django.db import connection, transaction, IntegrityError
     7from django.db.aggregates import Aggregate
    78from django.db.models.fields import DateField
    8 from django.db.models.query_utils import Q, select_related_descend
     9from django.db.models.query_utils import Q, select_related_descend, _value_or_object
    910from django.db.models import signals, sql
    1011from django.utils.datastructures import SortedDict
    1112
     
    266267        max_depth = self.query.max_depth
    267268        extra_select = self.query.extra_select.keys()
    268269        index_start = len(extra_select)
    269         for row in self.query.results_iter():
     270
     271        for row in self.query.results_iter(): 
    270272            if fill_cache:
    271                 obj, _ = get_cached_row(self.model, row, index_start,
    272                         max_depth, requested=requested)
     273                obj, aggregate_start = get_cached_row(self.model, row,
     274                                    index_start, max_depth, requested=requested)
    273275            else:
    274                 obj = self.model(*row[index_start:])
     276                aggregate_start = index_start + len(self.model._meta.fields)
     277                #ommit aggregates in object creation
     278                obj = self.model(*row[index_start:aggregate_start])
     279               
    275280            for i, k in enumerate(extra_select):
    276281                setattr(obj, k, row[i])
     282                       
     283            data_length = len(row)
     284            if aggregate_start < data_length:
     285                #the aggregate values retreived from the backend
     286                aggregate_values = [_value_or_object(row[i])
     287                                    for i in range(aggregate_start, data_length)]
     288
     289                select =  self.query.extra_select.keys() + self.query.select
     290                #Add the attributes to the model
     291                new_values = dict(zip(
     292                    [select[i].aliased_name
     293                     for i in range(aggregate_start, len(select))],
     294                    aggregate_values))
     295               
     296                obj.__dict__.update(new_values)
     297
    277298            yield obj
    278299
     300    def aggregate(self, *args, **kwargs):
     301        """
     302        Returns a dictionary containing the calculations (aggregation)
     303        over the current queryset
     304       
     305        If args is present the expression is passed as a kwarg ussing
     306        the Aggregate object's default alias.
     307        """
     308        for arg in args:
     309            kwargs[arg.aliased_name] = arg
     310
     311        for (alias, aggregate_expr) in kwargs.items():
     312            aggregate_expr.aliased_name = alias
     313            aggregate_expr.reduce = True
     314            self.query.add_aggregate(aggregate_expr, self.model)
     315
     316        return self.query.get_aggregation()
     317
    279318    def count(self):
    280319        """
    281320        Performs a SELECT COUNT() and returns the number of records as an
     
    547586        """
    548587        self.query.select_related = other.query.select_related
    549588
     589    def annotate(self, *args, **kwargs):
     590        self.return_groups = kwargs.get('grouped_objects')
     591        try:
     592            del kwargs['grouped_objects']
     593        except:
     594            pass
     595       
     596        for arg in args:
     597            kwargs[arg.aliased_name] = arg
     598
     599        opts = self.model._meta
     600        obj = self._clone(return_groups=self.return_groups)
     601
     602        if isinstance(obj, ValuesQuerySet):
     603            obj.query.set_group_by()
     604            #obj.query.group_by.extend(obj.query.select[:])
     605           
     606        if not obj.query.group_by:
     607            field_names = [f.attname for f in opts.fields]
     608            obj.query.add_fields(field_names, False)
     609            obj.query.set_group_by()
     610
     611        for (alias, aggregate_expr) in kwargs.items():
     612            aggregate_expr.aliased_name = alias
     613            aggregate_expr.reduce = False
     614            obj.query.add_aggregate(aggregate_expr, self.model)
     615
     616        return obj
     617
    550618    def order_by(self, *field_names):
    551619        """
    552620        Returns a new QuerySet instance with the ordering changed.
     
    635703        """
    636704        pass
    637705
    638 
    639706class ValuesQuerySet(QuerySet):
    640707    def __init__(self, *args, **kwargs):
    641708        super(ValuesQuerySet, self).__init__(*args, **kwargs)
     
    650717            len(self.field_names) != len(self.model._meta.fields)):
    651718            self.query.trim_extra_select(self.extra_names)
    652719        names = self.query.extra_select.keys() + self.field_names
     720        names.extend([x.aliased_name for x in self.query.select
     721                      if isinstance(x, Aggregate)])
     722        aggregate_start = len(self._fields) or len(self.model._meta.fields)
     723
    653724        for row in self.query.results_iter():
    654             yield dict(zip(names, row))
     725            normalized_row = list(row)
     726            for i in range(aggregate_start, len(normalized_row)):
     727                normalized_row[i] = _value_or_object(normalized_row[i])
    655728
     729            num_fields = len(self.model._meta.fields)
     730            has_grouping = (len(row) > aggregate_start and
     731                            len(self.field_names) < num_fields and
     732                            len(self.query.group_by) < num_fields)
     733           
     734            #Grouped objects QuerySet
     735            if (hasattr(self, 'return_groups') and self.return_groups):
     736                restrictions = dict(zip(names, normalized_row[:aggregate_start]))
     737                group_query = self.model.objects.filter(**restrictions)           
     738                yield (dict(zip(names, normalized_row)), group_query)
     739            else:
     740                yield dict(zip(names, normalized_row))
     741
    656742    def _setup_query(self):
    657743        """
    658744        Constructs the field_names list that the values query will be
     
    660746
    661747        Called by the _clone() method after initializing the rest of the
    662748        instance.
    663         """
     749        """       
    664750        self.extra_names = []
    665751        if self._fields:
    666752            if not self.query.extra_select:
     
    676762            # Default to all fields.
    677763            field_names = [f.attname for f in self.model._meta.fields]
    678764
    679         self.query.add_fields(field_names, False)
     765        self.query.add_fields(field_names, False, rebuild=True)
    680766        self.query.default_cols = False
    681767        self.field_names = field_names
    682768
  • django/db/models/query_utils.py

     
    6565        return False
    6666    return True
    6767
     68def _value_or_object(obj):
     69    try:
     70        return float(obj)
     71    except:
     72        return obj
  • django/db/backends/mysql/base.py

     
    110110class DatabaseFeatures(BaseDatabaseFeatures):
    111111    empty_fetchmany_value = ()
    112112    update_can_self_select = False
     113    allows_group_by_pk = True
    113114    related_fields_match_type = True
    114115
    115116class DatabaseOperations(BaseDatabaseOperations):
  • django/db/backends/__init__.py

     
    6262        return util.CursorDebugWrapper(cursor, self)
    6363
    6464class BaseDatabaseFeatures(object):
     65    allows_group_by_pk = False
    6566    # True if django.db.backend.utils.typecast_timestamp is used on values
    6667    # returned from dates() calls.
    6768    needs_datetime_string_cast = True
  • tests/modeltests/aggregation/fixtures/initial_data.json

     
     1[
     2 {
     3  "pk": 1,
     4  "model": "aggregation.publisher",
     5  "fields": {
     6   "name": "Apress ",
     7   "num_awards": 3
     8  }
     9 },
     10 {
     11  "pk": 2,
     12  "model": "aggregation.publisher",
     13  "fields": {
     14   "name": "Sams",
     15   "num_awards": 1
     16  }
     17 },
     18 {
     19  "pk": 3,
     20  "model": "aggregation.publisher",
     21  "fields": {
     22   "name": "Prentice Hall",
     23   "num_awards": 7
     24  }
     25 },
     26 {
     27  "pk": 4,
     28  "model": "aggregation.publisher",
     29  "fields": {
     30   "name": "Morgan Kaufmann",
     31   "num_awards": 9
     32  }
     33 },
     34 {
     35  "pk": 1,
     36  "model": "aggregation.book",
     37  "fields": {
     38   "publisher": 1,
     39   "isbn": "159059725",
     40   "name": "The Definitive Guide to Django: Web Development Done Right",
     41   "price": 30.0,
     42   "authors": [
     43    1,
     44    2
     45   ],
     46   "pages": 447
     47  }
     48 },
     49 {
     50  "pk": 2,
     51  "model": "aggregation.book",
     52  "fields": {
     53   "publisher": 2,
     54   "isbn": "067232959",
     55   "name": "Sams Teach Yourself Django in 24 Hours",
     56   "price": 23.09,
     57   "authors": [
     58    3
     59   ],
     60   "pages": 528
     61  }
     62 },
     63 {
     64  "pk": 3,
     65  "model": "aggregation.book",
     66  "fields": {
     67   "publisher": 1,
     68   "isbn": "159059996",
     69   "name": "Practical Django Projects",
     70   "price": 29.69,
     71   "authors": [
     72    4
     73   ],
     74   "pages": 300
     75  }
     76 },
     77 {
     78  "pk": 4,
     79  "model": "aggregation.book",
     80  "fields": {
     81   "publisher": 3,
     82   "isbn": "013235613",
     83   "name": "Python Web Development with Django",
     84   "price": 29.69,
     85   "authors": [
     86    5,
     87    6,
     88    7
     89   ],
     90   "pages": 350
     91  }
     92 },
     93 {
     94  "pk": 5,
     95  "model": "aggregation.book",
     96  "fields": {
     97   "publisher": 3,
     98   "isbn": "013790395",
     99   "name": "Artificial Intelligence: A Modern Approach",
     100   "price": 82.8,
     101   "authors": [
     102    8,
     103    9
     104   ],
     105   "pages": 1132
     106  }
     107 },
     108 {
     109  "pk": 6,
     110  "model": "aggregation.book",
     111  "fields": {
     112   "publisher": 4,
     113   "isbn": "155860191",
     114   "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
     115   "price": 75.0,
     116   "authors": [
     117    8
     118   ],
     119   "pages": 946
     120  }
     121 },
     122 {
     123  "pk": 1,
     124  "model": "aggregation.store",
     125  "fields": {
     126   "books": [
     127    1,
     128    2,
     129    3,
     130    4,
     131    5,
     132    6
     133   ],
     134   "name": "Amazon.com"
     135  }
     136 },
     137 {
     138  "pk": 2,
     139  "model": "aggregation.store",
     140  "fields": {
     141   "books": [
     142    1,
     143    3,
     144    5,
     145    6
     146   ],
     147   "name": "Books.com"
     148  }
     149 },
     150 {
     151  "pk": 3,
     152  "model": "aggregation.store",
     153  "fields": {
     154   "books": [
     155    3,
     156    4,
     157    6
     158   ],
     159   "name": "Mamma and Pappa's Books"
     160  }
     161 },
     162 {
     163  "pk": 1,
     164  "model": "aggregation.author",
     165  "fields": {
     166   "age": 34,
     167   "friends": [
     168    2,
     169    4
     170   ],
     171   "name": "Adrian Holovaty"
     172  }
     173 },
     174 {
     175  "pk": 2,
     176  "model": "aggregation.author",
     177  "fields": {
     178   "age": 35,
     179   "friends": [
     180    1,
     181    7
     182   ],
     183   "name": "Jacob Kaplan-Moss"
     184  }
     185 },
     186 {
     187  "pk": 3,
     188  "model": "aggregation.author",
     189  "fields": {
     190   "age": 45,
     191   "friends": [],
     192   "name": "Brad Dayley"
     193  }
     194 },
     195 {
     196  "pk": 4,
     197  "model": "aggregation.author",
     198  "fields": {
     199   "age": 29,
     200   "friends": [
     201    1
     202   ],
     203   "name": "James Bennett"
     204  }
     205 },
     206 {
     207  "pk": 5,
     208  "model": "aggregation.author",
     209  "fields": {
     210   "age": 37,
     211   "friends": [
     212    6,
     213    7
     214   ],
     215   "name": "Jeffrey Forcier "
     216  }
     217 },
     218 {
     219  "pk": 6,
     220  "model": "aggregation.author",
     221  "fields": {
     222   "age": 29,
     223   "friends": [
     224    5,
     225    7
     226   ],
     227   "name": "Paul Bissex"
     228  }
     229 },
     230 {
     231  "pk": 7,
     232  "model": "aggregation.author",
     233  "fields": {
     234   "age": 25,
     235   "friends": [
     236    2,
     237    5,
     238    6
     239   ],
     240   "name": "Wesley J. Chun"
     241  }
     242 },
     243 {
     244  "pk": 8,
     245  "model": "aggregation.author",
     246  "fields": {
     247   "age": 57,
     248   "friends": [
     249    9
     250   ],
     251   "name": "Peter Norvig"
     252  }
     253 },
     254 {
     255  "pk": 9,
     256  "model": "aggregation.author",
     257  "fields": {
     258   "age": 46,
     259   "friends": [
     260    8
     261   ],
     262   "name": "Stuart Russell"
     263  }
     264 }
     265]
  • tests/modeltests/aggregation/models.py

     
     1# coding: utf-8
     2from django.db import models
     3
     4class Author(models.Model):
     5   name = models.CharField(max_length=100)
     6   age = models.IntegerField()
     7   friends = models.ManyToManyField('self', blank=True)
     8
     9   def __unicode__(self):
     10      return self.name
     11
     12class Publisher(models.Model):
     13   name = models.CharField(max_length=300)
     14   num_awards = models.IntegerField()
     15   
     16   def __unicode__(self):
     17      return self.name
     18
     19class Book(models.Model):
     20   isbn = models.CharField(max_length=9)
     21   name = models.CharField(max_length=300)
     22   pages = models.IntegerField()
     23   price = models.FloatField()
     24   authors = models.ManyToManyField(Author)
     25   publisher = models.ForeignKey(Publisher)
     26   
     27   def __unicode__(self):
     28      return self.name
     29
     30class Store(models.Model):
     31   name = models.CharField(max_length=300)
     32   books = models.ManyToManyField(Book)
     33   
     34   def __unicode__(self):
     35      return self.name
     36
     37class Entries(models.Model):
     38   EntryID = models.AutoField(primary_key=True, db_column='Entry ID')
     39   Entry = models.CharField(unique=True, max_length=50)
     40   Exclude = models.BooleanField()
     41
     42class Clues(models.Model):
     43   ID = models.AutoField(primary_key=True)
     44   EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID')
     45   Clue = models.CharField(max_length=150)
     46
     47# Tests on 'aggergate'
     48# Different backends and numbers.
     49__test__ = {'API_TESTS': """
     50>>> from django.core import management
     51
     52# Reset the database representation of this app.
     53# This will return the database to a clean initial state.
     54>>> management.call_command('flush', verbosity=0, interactive=False)
     55
     56# Empty Call
     57>>> Author.objects.all().aggregate()
     58{}
     59
     60>>> from django.db.aggregates import Avg, Sum, Count, Max, Min
     61
     62# Note that rounding of floating points is being used for the tests to
     63# pass for all backends
     64
     65# Single model aggregation
     66#
     67
     68# Simple
     69# Average Author age
     70>>> Author.objects.all().aggregate(Avg('age'))
     71{'age__avg': 37.4...}
     72
     73# Multiple
     74# Average and Sum of Author's age
     75>>> Author.objects.all().aggregate(Sum('age'), Avg('age'))
     76{'age__sum': 337.0, 'age__avg': 37.4...}
     77
     78# After aplying other modifiers
     79# Sum of the age of those older than 29 years old
     80>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age'))
     81{'age__sum': 254.0}
     82
     83# Depth-1 Joins
     84#
     85
     86# On Relationships with self
     87# Average age of those with friends (not exactelly.
     88# That would be: Author.objects.all().exclude(friends=None).aggregate(Avg('age')))
     89>>> Author.objects.all().aggregate(Avg('friends__age'))
     90{'friends__age__avg': 34.07...}
     91
     92# On ManyToMany Relationships
     93#
     94
     95# Forward
     96# Average age of the Authors of Books that cost less than 50 USD
     97>>> Book.objects.all().filter(price__lt=50).aggregate(Avg('authors__age'))
     98{'authors__age__avg': 33.42...}
     99
     100
     101# Backward
     102# Average price of the Books whose Author's name contains the letter 'a'
     103>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__price'))
     104{'book__price__avg': 37.54...}
     105
     106# On OneToMany Relationships
     107#
     108
     109# Forward
     110# Sum of the number of awards of each Book's Publisher
     111>>> Book.objects.all().aggregate(Sum('publisher__num_awards'))
     112{'publisher__num_awards__sum': 30.0}
     113
     114# Backward
     115# Sum of the price of every Book that has a Publisher
     116>>> Publisher.objects.all().aggregate(Sum('book__price'))
     117{'book__price__sum': 270.269...}
     118
     119# Multiple Joins
     120#
     121
     122#Forward
     123>>> Store.objects.all().aggregate(Max('books__authors__age'))
     124{'books__authors__age__max': 57.0}
     125
     126#Backward
     127>>> Author.objects.all().aggregate(Min('book__publisher__num_awards'))
     128{'book__publisher__num_awards__min': 1.0}
     129
     130# You can also use aliases.
     131#
     132
     133# Average amazon.com Book price
     134>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__price'))
     135{'amazon_mean': 45.04...}
     136
     137# Tests on annotate()
     138#
     139
     140# An empty annotate call does nothing but return the same QuerySet
     141>>> Book.objects.all().annotate().order_by('pk')
     142[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Sams Teach Yourself Django in 24 Hours>, <Book: Practical Django Projects>, <Book: Python Web Development with Django>, <Book: Artificial Intelligence: A Modern Approach>, <Book: Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp>]
     143
     144#Annotate inserts the alias into the model object with the aggregated result
     145>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age'))
     146>>> books.get(pk=1).name
     147u'The Definitive Guide to Django: Web Development Done Right'
     148
     149>>> books.get(pk=1).mean_age
     15034.5
     151
     152#Calls to values() are not commutative over annotate().
     153
     154#Calling values on a queryset that has annotations returns the output
     155#as a dictionary
     156>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values()
     157[{'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
     158
     159#Calling it with paramters reduces the output but does not remove the
     160#annotation.
     161>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name')
     162[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}]
     163
     164#An empty values() call before annotating has the same effect as an
     165#empty values() call after annotating
     166>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age'))
     167[{'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]
     168
     169#Calling annotate() on a ValuesQuerySet annotates over the groups of
     170#fields to be selected by the ValuesQuerySet.
     171
     172#Note that an extra parameter is added to each dictionary. This
     173#parameter is a queryset representing the objects that have been
     174#grouped to generate the annotation
     175
     176>>> Book.objects.all().values('price').annotate(number=Count('authors__id'), mean_age=Avg('authors__age')).order_by('price')
     177[{'price': 23.09, 'number': 1.0, 'mean_age': 45.0}, {'price': 29.690000000000001, 'number': 4.0, 'mean_age': 30.0}, {'price': 30.0, 'number': 2.0, 'mean_age': 34.5}, {'price': 75.0, 'number': 1.0, 'mean_age': 57.0}, {'price': 82.799999999999997, 'number': 2.0, 'mean_age': 51.5}]
     178
     179
     180#Notice that the output includes all Authors but the value of the aggregation
     181#is 0 for those that have no friends.
     182#(consider having a neutral ('zero') element for each operation)
     183>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id')
     184>>> len(authors)
     1859
     186>>> for i in authors:
     187...     print i.name, i.friends__age__avg
     188...
     189Adrian Holovaty 32.0
     190Jacob Kaplan-Moss 29.5
     191Brad Dayley None
     192James Bennett 34.0
     193Jeffrey Forcier  27.0
     194Paul Bissex 31.0
     195Wesley J. Chun 33.66...
     196Peter Norvig 46.0
     197Stuart Russell 57.0
     198
     199#The Count aggregation function allows an extra parameter: distinct.
     200#
     201>>> Book.objects.all().aggregate(Count('price'))
     202{'price__count': 6.0}
     203
     204>>> Book.objects.all().aggregate(Count('price', distinct=True))
     205{'price__count': 5.0}
     206
     207#Retreiving the grouped objects
     208
     209
     210#When using Count you can also ommit the primary key and refer only to
     211#the related field name if you want to count all the related objects
     212#and not a specific column
     213>>> explicit = list(Author.objects.annotate(Count('book__id')))
     214>>> implicit = list(Author.objects.annotate(Count('book')))
     215>>> explicit == implicit
     216True
     217
     218##
     219# Ordering is allowed on aggregates
     220>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest')
     221[{'price': 30.0, 'oldest': 35.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 75.0, 'oldest': 57.0}, {'price': 82.7..., 'oldest': 57.0}]
     222
     223>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('-oldest')
     224[{'price': 75.0, 'oldest': 57.0}, {'price': 82.7..., 'oldest': 57.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 30.0, 'oldest': 35.0}]
     225
     226>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('-oldest', 'price')
     227[{'price': 75.0, 'oldest': 57.0}, {'price': 82.7..., 'oldest': 57.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 30.0, 'oldest': 35.0}]
     228
     229>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('-oldest', '-price')
     230[{'price': 82.7..., 'oldest': 57.0}, {'price': 75.0, 'oldest': 57.0}, {'price': 23.09, 'oldest': 45.0}, {'price': 29.6..., 'oldest': 37.0}, {'price': 30.0, 'oldest': 35.0}]
     231
     232# It is possible to aggregate over anotated values
     233#
     234>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
     235{'num_authors__avg': 1.66...}
     236
     237# You can filter the results based on the aggregation alias.
     238#
     239
     240#Lets add a publisher to test the different possibilities for filtering
     241>>> p = Publisher(name='Expensive Publisher', num_awards=0)
     242>>> p.save()
     243>>> Book(name='ExpensiveBook1', pages=1, isbn='111', price=1000, publisher=p).save()
     244>>> Book(name='ExpensiveBook2', pages=1, isbn='222', price=1000, publisher=p).save()
     245>>> Book(name='ExpensiveBook3', pages=1, isbn='333', price=35, publisher=p).save()
     246
     247#Consider the following queries:
     248
     249#Publishers that have:
     250
     251#(i) more than one book
     252>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
     253[<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
     254
     255#(ii) a book that cost less than 40
     256>>> Publisher.objects.filter(book__price__lt=40).order_by('pk')
     257[<Publisher: Apress >, <Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
     258
     259#(iii) more than one book and (at least) a book that cost less than 40
     260>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=40).order_by('pk')
     261[<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
     262
     263#(iv) more than one book that costs less than 40
     264>>> Publisher.objects.filter(book__price__lt=40).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
     265[<Publisher: Apress >]
     266
     267# Now a bit of testing on the different lookup types
     268#
     269
     270>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk')
     271[<Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
     272
     273>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk')
     274[<Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>]
     275
     276>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk')
     277[<Publisher: Sams>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]
     278
     279>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True)
     280[]
     281
     282>>> p.delete()
     283
     284# Community tests
     285#
     286
     287#Thanks to Russell for the following set
     288#
     289
     290#Does Author X have any friends? (or better, how many friends does author X have)
     291>> Author.objects.filter(pk=1).aggregate(Count('friends__id'))
     292{'friends__id__count': 2.0}
     293
     294#Give me a list of all Books with more than 1 authors
     295>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk')
     296[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Artificial Intelligence: A Modern Approach>]
     297
     298#Give me a list of all Authors that have no friends
     299>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk')
     300[<Author: Brad Dayley>]
     301
     302#Give me a list of all publishers that have published more than 1 books
     303>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
     304[<Publisher: Apress >, <Publisher: Prentice Hall>]
     305
     306#Give me a list of all publishers that have published more than 1 books that cost less than 30
     307#>>> Publisher.objects.all().filter(book__price__lt=40).annotate(num_books=Count('book__id')).filter(num_books__gt=1)
     308[<Publisher: Apress >]
     309
     310#Give me a list of all Books that were written by X and one other author.
     311>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1)
     312[<Book: Artificial Intelligence: A Modern Approach>]
     313
     314#Give me the average price of all Books that were written by X and one other author.
     315#(Aggregate over objects discovered using membership of the m2m set)
     316
     317#Adding an existing author to another book to test it the right way
     318>>> a = Author.objects.get(name__contains='Norvig')
     319>>> b = Book.objects.get(name__contains='Done Right')
     320>>> b.authors.add(a)
     321>>> b.save()
     322
     323#This should do it
     324>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('price'))
     325{'price__avg': 56.39...}
     326>>> b.authors.remove(a)
     327
     328#
     329# --- Just one of the hard ones left ---
     330#
     331
     332#Give me a list of all Authors that have published a book with at least one other person
     333#(Filters over a count generated on a related object)
     334#
     335# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0]
     336# F-Syntax is required. Will be fixed after F objects are available
     337
     338
     339#Thanks to Karen for the following set
     340# Tests on fields with different names and spaces. (but they work =) )
     341
     342>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True))
     343[]
     344
     345"""}
  • tests/regressiontests/aggregation_regress/fixtures/initial_data.json

     
     1[
     2 {
     3  "pk": 1,
     4  "model": "aggregation_regress.publisher",
     5  "fields": {
     6   "name": "Apress ",
     7   "num_awards": 3
     8  }
     9 },
     10 {
     11  "pk": 2,
     12  "model": "aggregation_regress.publisher",
     13  "fields": {
     14   "name": "Sams",
     15   "num_awards": 1
     16  }
     17 },
     18 {
     19  "pk": 3,
     20  "model": "aggregation_regress.publisher",
     21  "fields": {
     22   "name": "Prentice Hall",
     23   "num_awards": 7
     24  }
     25 },
     26 {
     27  "pk": 4,
     28  "model": "aggregation_regress.publisher",
     29  "fields": {
     30   "name": "Morgan Kaufmann",
     31   "num_awards": 9
     32  }
     33 },
     34 {
     35  "pk": 1,
     36  "model": "aggregation_regress.book",
     37  "fields": {
     38   "publisher": 1,
     39   "isbn": "159059725",
     40   "name": "The Definitive Guide to Django: Web Development Done Right",
     41   "price": 30.0,
     42   "authors": [
     43    1,
     44    2
     45   ],
     46   "pages": 447
     47  }
     48 },
     49 {
     50  "pk": 2,
     51  "model": "aggregation_regress.book",
     52  "fields": {
     53   "publisher": 2,
     54   "isbn": "067232959",
     55   "name": "Sams Teach Yourself Django in 24 Hours",
     56   "price": 23.09,
     57   "authors": [
     58    3
     59   ],
     60   "pages": 528
     61  }
     62 },
     63 {
     64  "pk": 3,
     65  "model": "aggregation_regress.book",
     66  "fields": {
     67   "publisher": 1,
     68   "isbn": "159059996",
     69   "name": "Practical Django Projects",
     70   "price": 29.69,
     71   "authors": [
     72    4
     73   ],
     74   "pages": 300
     75  }
     76 },
     77 {
     78  "pk": 4,
     79  "model": "aggregation_regress.book",
     80  "fields": {
     81   "publisher": 3,
     82   "isbn": "013235613",
     83   "name": "Python Web Development with Django",
     84   "price": 29.69,
     85   "authors": [
     86    5,
     87    6,
     88    7
     89   ],
     90   "pages": 350
     91  }
     92 },
     93 {
     94  "pk": 5,
     95  "model": "aggregation_regress.book",
     96  "fields": {
     97   "publisher": 3,
     98   "isbn": "013790395",
     99   "name": "Artificial Intelligence: A Modern Approach",
     100   "price": 82.8,
     101   "authors": [
     102    8,
     103    9
     104   ],
     105   "pages": 1132
     106  }
     107 },
     108 {
     109  "pk": 6,
     110  "model": "aggregation_regress.book",
     111  "fields": {
     112   "publisher": 4,
     113   "isbn": "155860191",
     114   "name": "Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp",
     115   "price": 75.0,
     116   "authors": [
     117    8
     118   ],
     119   "pages": 946
     120  }
     121 },
     122 {
     123  "pk": 1,
     124  "model": "aggregation_regress.store",
     125  "fields": {
     126   "books": [
     127    1,
     128    2,
     129    3,
     130    4,
     131    5,
     132    6
     133   ],
     134   "name": "Amazon.com"
     135  }
     136 },
     137 {
     138  "pk": 2,
     139  "model": "aggregation_regress.store",
     140  "fields": {
     141   "books": [
     142    1,
     143    3,
     144    5,
     145    6
     146   ],
     147   "name": "Books.com"
     148  }
     149 },
     150 {
     151  "pk": 3,
     152  "model": "aggregation_regress.store",
     153  "fields": {
     154   "books": [
     155    3,
     156    4,
     157    6
     158   ],
     159   "name": "Mamma and Pappa's Books"
     160  }
     161 },
     162 {
     163  "pk": 1,
     164  "model": "aggregation_regress.author",
     165  "fields": {
     166   "age": 34,
     167   "friends": [
     168    2,
     169    4
     170   ],
     171   "name": "Adrian Holovaty"
     172  }
     173 },
     174 {
     175  "pk": 2,
     176  "model": "aggregation_regress.author",
     177  "fields": {
     178   "age": 35,
     179   "friends": [
     180    1,
     181    7
     182   ],
     183   "name": "Jacob Kaplan-Moss"
     184  }
     185 },
     186 {
     187  "pk": 3,
     188  "model": "aggregation_regress.author",
     189  "fields": {
     190   "age": 45,
     191   "friends": [],
     192   "name": "Brad Dayley"
     193  }
     194 },
     195 {
     196  "pk": 4,
     197  "model": "aggregation_regress.author",
     198  "fields": {
     199   "age": 29,
     200   "friends": [
     201    1
     202   ],
     203   "name": "James Bennett"
     204  }
     205 },
     206 {
     207  "pk": 5,
     208  "model": "aggregation_regress.author",
     209  "fields": {
     210   "age": 37,
     211   "friends": [
     212    6,
     213    7
     214   ],
     215   "name": "Jeffrey Forcier "
     216  }
     217 },
     218 {
     219  "pk": 6,
     220  "model": "aggregation_regress.author",
     221  "fields": {
     222   "age": 29,
     223   "friends": [
     224    5,
     225    7
     226   ],
     227   "name": "Paul Bissex"
     228  }
     229 },
     230 {
     231  "pk": 7,
     232  "model": "aggregation_regress.author",
     233  "fields": {
     234   "age": 25,
     235   "friends": [
     236    2,
     237    5,
     238    6
     239   ],
     240   "name": "Wesley J. Chun"
     241  }
     242 },
     243 {
     244  "pk": 8,
     245  "model": "aggregation_regress.author",
     246  "fields": {
     247   "age": 57,
     248   "friends": [
     249    9
     250   ],
     251   "name": "Peter Norvig"
     252  }
     253 },
     254 {
     255  "pk": 9,
     256  "model": "aggregation_regress.author",
     257  "fields": {
     258   "age": 46,
     259   "friends": [
     260    8
     261   ],
     262   "name": "Stuart Russell"
     263  }
     264 }
     265]
  • tests/regressiontests/aggregation_regress/models.py

     
     1# coding: utf-8
     2from django.db import models
     3
     4class Author(models.Model):
     5   name = models.CharField(max_length=100)
     6   age = models.IntegerField()
     7   friends = models.ManyToManyField('self', blank=True)
     8
     9   def __unicode__(self):
     10      return self.name
     11
     12   class Admin:
     13      pass
     14
     15class Publisher(models.Model):
     16   name = models.CharField(max_length=300)
     17   num_awards = models.IntegerField()
     18   
     19   def __unicode__(self):
     20      return self.name
     21
     22   class Admin:
     23      pass
     24
     25class Book(models.Model):
     26   isbn = models.CharField(max_length=9)
     27   name = models.CharField(max_length=300)
     28   pages = models.IntegerField()
     29   price = models.FloatField()
     30   authors = models.ManyToManyField(Author)
     31   publisher = models.ForeignKey(Publisher)
     32   
     33   def __unicode__(self):
     34      return self.name
     35
     36   class Admin:
     37      pass
     38
     39class Store(models.Model):
     40   name = models.CharField(max_length=300)
     41   books = models.ManyToManyField(Book)
     42   
     43   def __unicode__(self):
     44      return self.name
     45
     46   class Admin:
     47      pass
     48
     49#Extra does not play well with values. Modify the tests if/when this is fixed.
     50__test__ = {'API_TESTS': """
     51>>> from django.core import management
     52>>> from django.db.models import get_app
     53
     54# Reset the database representation of this app.
     55# This will return the database to a clean initial state.
     56>>> management.call_command('flush', verbosity=0, interactive=False)
     57
     58>>> from django.db.aggregates import Avg, Sum, Count, Max, Min
     59
     60>>> Book.objects.all().aggregate(Sum('pages'), Avg('pages'))
     61{'pages__sum': 3703.0, 'pages__avg': 617.1...}
     62
     63>>> Book.objects.all().values().aggregate(Sum('pages'), Avg('pages'))
     64{'pages__sum': 3703.0, 'pages__avg': 617.1...}
     65
     66>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).aggregate(Sum('pages'))
     67{'pages__sum': 3703.0}
     68
     69>>> Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1).__dict__
     70{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447}
     71
     72>>> Book.objects.all().extra(select={'price_per_page' : 'price / pages'}).annotate(mean_auth_age=Avg('authors__age')).get(pk=1).__dict__
     73{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1, 'pages': 447}
     74
     75>>> Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values().get(pk=1)
     76{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1.0, 'pages': 447}
     77
     78>>> Book.objects.all().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).values('name').get(pk=1)
     79{'mean_auth_age': 34.5, 'name': u'The Definitive Guide to Django: Web Development Done Right'}
     80
     81>>> Book.objects.all().values().annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1)
     82{'mean_auth_age': 34.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'price_per_page': 0.067..., 'price': 30.0, 'id': 1, 'publisher_id': 1.0, 'pages': 447}
     83
     84>>> Book.objects.all().values('name').annotate(mean_auth_age=Avg('authors__age')).extra(select={'price_per_page' : 'price / pages'}).get(pk=1)
     85{'mean_auth_age': 34.5, 'name': u'The Definitive Guide to Django: Web Development Done Right'}
     86
     87#Check that all of the objects are getting counted (allow_nulls) and that values respects the amount of objects
     88>>> len(Author.objects.all().annotate(Avg('friends__age')).values())
     899
     90
     91#Check that consecutive calls to annotate dont break group by
     92>>> Book.objects.values('price').annotate(oldest=Max('authors__age')).order_by('oldest').annotate(Max('publisher__num_awards'))
     93[{'price': 30.0, 'oldest': 35.0, 'publisher__num_awards__max': 3.0}, {'price': 29.69..., 'oldest': 37.0, 'publisher__num_awards__max': 7.0}, {'price': 23.09, 'oldest': 45.0, 'publisher__num_awards__max': 1.0}, {'price': 75.0, 'oldest': 57.0, 'publisher__num_awards__max': 9.0}, {'price': 82.7..., 'oldest': 57.0, 'publisher__num_awards__max': 7.0}]
     94
     95#Checks fixed bug with multiple aggregate objects in the aggregate call
     96>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Max('price'), Sum('num_authors'))
     97{'num_authors__sum': 10.0, 'price__max': 82.7...}
     98
     99"""
     100}
  • tests/regressiontests/queries/models.py

     
    582582# An empty values() call includes all aliases, including those from an extra()
    583583>>> dicts = qs.values().order_by('id')
    584584>>> [sorted(d.items()) for d in dicts]
    585 [[('author_id', 2), ('good', 0), ('id', 1), ('rank', 2)], [('author_id', 3), ('good', 0), ('id', 2), ('rank', 1)], [('author_id', 1), ('good', 1), ('id', 3), ('rank', 3)]]
     585[[('author_id', 2.0), ('good', 0), ('id', 1), ('rank', 2)], [('author_id', 3.0), ('good', 0), ('id', 2), ('rank', 1)], [('author_id', 1.0), ('good', 1), ('id', 3), ('rank', 3)]]
    586586
    587587Bugs #2874, #3002
    588588>>> qs = Item.objects.select_related().order_by('note__note', 'name')
     
    953953>>> len([x[2] for x in q.alias_map.values() if x[2] == q.LOUTER and q.alias_refcount[x[1]]])
    9549541
    955955
    956 A check to ensure we don't break the internal query construction of GROUP BY
    957 and HAVING. These aren't supported in the public API, but the Query class knows
    958 about them and shouldn't do bad things.
    959 >>> qs = Tag.objects.values_list('parent_id', flat=True).order_by()
    960 >>> qs.query.group_by = ['parent_id']
    961 >>> qs.query.having = ['count(parent_id) > 1']
    962 >>> expected = [t3.parent_id, t4.parent_id]
    963 >>> expected.sort()
    964 >>> result = list(qs)
    965 >>> result.sort()
    966 >>> expected == result
    967 True
    968 
    969956"""}
    970957
    971958# In Python 2.3 and the Python 2.6 beta releases, exceptions raised in __len__
  • AUTHORS

     
    3030    AgarFu <heaven@croasanaso.sytes.net>
    3131    Dagur Páll Ammendrup <dagurp@gmail.com>
    3232    Collin Anderson <cmawebsite@gmail.com>
     33    Nicolas Lara <nicolaslara@gmail.com>
    3334    Jeff Anderson <jefferya@programmerq.net>
    3435    Andreas
    3536    andy@jadedplanet.net
  • docs/aggregation.txt

     
     1=============
     2 Aggregation
     3=============
     4
     5**New in Django development version**
     6
     7Aggergation works on top of an existing QuerySet allowing you to do
     8calculations over sets of objects at the database level.
     9
     10The calculations to be retrieved are expressed using Aggregate
     11objects. 
     12
     13Aggregate objects
     14=================
     15
     16Aggregate objects define the correspondance between the lookup being
     17done in ORM syntax and the query that is executed in the backend.
     18
     19All Aggregate objects take the field to be aggregated upon as a string
     20and an optional alias to represent the calculation in the result.
     21
     22Field representations are done in the same way as in field
     23lookups. For example::
     24
     25    Max(total_price='price')
     26
     27would represent the maximum price of the selected objects in the model
     28aggregated upon and it would be refered to as "total_price" while::
     29
     30    Avg(mean_age='friends__age')
     31
     32would represent the maximum of the related field for all the objects
     33related to the objects in the model aggregated upon, likewise refered
     34to as "mean_age".
     35
     36There are many possible ways to use aggregation so the actual results
     37of different lookups will become clearer in the documentation for
     38``aggregate()`` and ``annotate()``.
     39
     40If the alias is not present a default alias is defined according to
     41each Aggregate object. So::
     42
     43     Min('friend__height')
     44
     45would be refered as "friend__height__max".
     46
     47For every aggregate object that spans multiple models, if the field of
     48the related model to be used in the aggregation is not specified, the
     49field defaults to the primary key.
     50
     51Aggregate objects are located at ``django.db.aggregates``. Every
     52aggregate object is a subclass of ``Aggregate``. A empty subclass of
     53aggregate called Func could be used like this::
     54
     55          Func('field')
     56
     57and have the following SQL equivalent:::
     58
     59    SELECT FUNC(field) as field__func ...
     60
     61The following Aggregate subclases are pre-defined:
     62
     63Max
     64---
     65
     66Calculates the maximum on the given field.
     67
     68Default alias: ``field__max``
     69
     70Min
     71---
     72
     73Calculates the minimum on the given field.
     74
     75Default alias: ``field__min``
     76
     77Avg
     78---
     79
     80Calculates the average on the given field.
     81
     82Default alias: ``field__avg``
     83
     84Sum
     85---
     86
     87Calculates the sumation on the given field.
     88
     89Default alias: ``field__sum``
     90
     91Count
     92-----
     93
     94Counts the objects in which the field is not "Null". For counting
     95regardles of the field please refer to `count()`_.
     96
     97Count takes an optional parameter: *distinct*.
     98
     99Distinct, if True, reduces the output counting repetitions on a field only once.
     100
     101If distinct is True
     102
     103   Count(field, distinct=True)
     104
     105has the SQL equivalent:
     106
     107    COUNT(DISTINCT field)
     108
     109otherwise it is:
     110
     111    COUNT(field)
     112
     113Default alias: ``field__count``
     114
     115.. _count(): ../db-api/#count
     116
     117Methods that do aggregation
     118===========================
     119
     120For this section we'll refer to the following models::
     121
     122    class Author(models.Model):                   
     123       name = models.CharField(max_length=100)   
     124       age = models.IntegerField()               
     125       friends = models.ManyToManyField('self', blank=True)
     126                                                 
     127    class Publisher(models.Model):               
     128       name = models.CharField(max_length=300)   
     129       num_awards = models.IntegerField()         
     130                                                 
     131    class Book(models.Model):                     
     132       isbn = models.CharField(max_length=9)     
     133       name = models.CharField(max_length=300)   
     134       pages = models.IntegerField()             
     135       price = models.FloatField()               
     136       authors = models.ManyToManyField(Author)   
     137       publisher = models.ForeignKey(Publisher)   
     138                                                 
     139    class Store(models.Model):                   
     140       name = models.CharField(max_length=300)   
     141       books = models.ManyToManyField(Book)
     142
     143
     144aggregate(args, kwargs)
     145-----------------------
     146
     147Returns a dictionary containing the calculations (aggregation) over
     148the current queryset.
     149
     150    >>> Book.objects.aggregate(Avg('price'), highest_price=Max('price'))
     151    {'price__avg': 45.045000000000002, 'highest_price': 82.799999999999997}
     152
     153You can also do aggregate lookups on related models.
     154
     155    >>> Author.objects.aggregate(Sum('book__price'))
     156    {'book__price__sum': 442.44999999999999}
     157
     158it is important to notice that the previous query reads "The sum of
     159the price of every book for every author". So if a book has many
     160authors its price will be added as many times as authors the book
     161has. If you would be interested, instead, in "the sum of the price for
     162all books" you would need to do a query like this::
     163
     164    >>> Book.objects.aggregate(Sum('price'))
     165    {'price__sum': 270.26999999999998}
     166
     167.. note::
     168   
     169   It is importante to notice that aggregate() is a terminal
     170   clause. This means that it does *not* return a queryset and no
     171   other modifiers can be applied after it.
     172
     173annotate(args, kwargs)
     174----------------------
     175
     176Returns a QuerySet extended with the results of the calculations on
     177the given fields. So if you need to retrieve the "age for the oldest
     178author of each book" you could do:
     179
     180    >>> books = Book.objects.annotate(Max('authors__age'))
     181    >>> books[0].name
     182    u'Python Web Development With Django'
     183    >>> books[0].authors.all()
     184    [<Author: Jeffrey Forcier >, <Author: Paul Bissex>, <Author: Wesley J. Chun>]
     185    >>> books[0].authors__age__max
     186    37.0
     187
     188And the output would be the model object extended with the aggregation
     189information.
     190
     191grouping
     192~~~~~~~~
     193
     194Sometimes you want to annotate, not on the whole set of objects but on
     195those that share the same value for some fields. To do this, you
     196appply values() before annotating. For example if you want to retrieve
     197the average author age for the books of the same price you could do::
     198
     199    >>> books = Book.objects.values('price').annotate(oldest=Max('authors__age'))
     200    >>> for book_group in books:
     201    ...    print 'price', book_group['price'], 'oldest', book_group['oldest']
     202    ...
     203    price 29.69 oldest 37.0
     204    price 75.0 oldest 57.0
     205    price 82.8 oldest 57.0
     206    price 23.09 oldest 45.0
     207    price 30.0 oldest 35.0
     208
     209Note that aplying values after annotate() does not have the same
     210efect. It reduces the output but no grouping is made:
     211
     212    >>> books = Book.objects.annotate(Max('authors__age')).values('price') #An entry for every Book
     213    >>> for i in books:
     214    ...    print 'price', i['price'], 'max', i['authors__age__max']
     215
     216    price 23.09 max 45.0
     217    price 29.69 max 37.0
     218    price 75.0 max 57.0
     219    price 82.8 max 57.0
     220    price 30.0 max 35.0
     221    price 29.69 max 29.0
     222
     223    >>> len(Book.objects.annotate(Max('authors__age')).values('price')) #An entry for every Book
     224    6
     225
     226    >>> len(Book.objects.values('price').annotate(Max('authors__age'))) #Books are grouped by price
     227    5
     228
     229grouped_objects
     230~~~~~~~~~~~~~~~
     231
     232Also, after doing an annotation, one might need to recover the
     233elements that were grouped to do the calculation. To do this, the
     234grouped_objects argument to annotate is provided. This argument, if
     235True, changes the output format so the result is a list of tuples
     236containing the values of the grouping and a queryset to retreive the
     237the objects that were grouped.
     238
     239This changes the output in a way that for each result there's a tuple
     240containing the result of the aggregation and a queryset to retrieve
     241the objects that were grouped
     242
     243    >>> books = Book.objects.values('price').annotate(oldest=Max('authors__age'), grouped_objects=True)
     244    >>> books[0]
     245    ({'price': 29.690000000000001, 'oldest': 37.0},
     246     [<Book: Practical Django Projects>, <Book: Python Web Development with Django>])
     247
     248.. note::
     249
     250   As normal querysets the queryset returned by ``grouped_objects`` is
     251   lazy and will not be executed until it is evaluated. So if the
     252   objects change before evaluating the queryset the aggregated result
     253   might not hold.
     254
     255
     256filtering
     257~~~~~~~~~
     258
     259Another thing you might need is to retreive only certain objects based
     260on the result of a calculation. To do this the filtering syntax is
     261used on the alias of the annotation. See filter(link) for more
     262information on the lookups.
     263
     264There are four different types of filtering that you might be
     265interested in. Each of this have adiferent representation.
     266
     267    * Simple filtering on the annotations
     268
     269          an example of this is retreiving the "Publishers that have more than one book"
     270
     271            >>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
     272            [<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
     273
     274    * Simple filtering on the whole set
     275
     276          This is a normal, un-related to aggregation, filter. "Publishers that have books that cost les than 40"
     277
     278             >>> Publisher.objects.filter(book__price__lt=40).order_by('pk')
     279             [<Publisher: Apress >, <Publisher: Apress >, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
     280
     281    * Annotationg on the whole set and filtering on annotations
     282
     283          "Publishers that have more than one book and (at least) a book that cost less than 40"
     284
     285             >>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=40).order_by('pk')
     286             [<Publisher: Apress >, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]
     287
     288    * Filtering and annotating on the whole set
     289
     290          "Publishers that have more than one book that costs less than 40"
     291
     292             >>> Publisher.objects.filter(book__price__lt=40).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
     293             [<Publisher: Apress >]
     294
     295The reason for this types of filtering to exist is because the
     296filtering results vary depending whether they are done in a join of
     297two models or the single model. When you need the filtering to be done
     298on the model and not the join of two models the filtering must be done
     299before calling the annotation. If, on the contrary, the filtering
     300should be done on the result of the joining it must be done after
     301annotating.
     302
     303Aggregating on annotated values
     304-------------------------------
     305
     306It is possible to apply ``aggregate()`` on the result of an annotation
     307that does not gorup objects. Doing this will generate a subquery for
     308the annotated objects and calculate the aggregation on top of it.
     309
     310This way, if you wanted to calculate the average number of authors per
     311book you could do::
     312
     313    >>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
     314    {'num_authors__avg': 1.66...}
     315
Back to Top