Ticket #1435: agg_funcs.diff

File agg_funcs.diff, 16.0 KB (added by Jacob, 18 years ago)
  • django/db/models/manager.py

     
    5151        # Returns a caching QuerySet.
    5252        return self.get_query_set()
    5353
    54     def count(self):
    55         return self.get_query_set().count()
     54    def count(self,fieldname="*"):
     55        """Returns the number of rows in the default case. Else returns the number of non-null entries in the column corresponding to the fieldname.
     56        """
     57        return self.get_query_set().count(fieldname)
    5658
    5759    def dates(self, *args, **kwargs):
    5860        return self.get_query_set().dates(*args, **kwargs)
     
    8789    def values(self, *args, **kwargs):
    8890        return self.get_query_set().values(*args, **kwargs)
    8991
     92    # Aggregate functions (column-oriented)
     93
     94    def get_aggregate(self,functype,column):
     95        return self.get_query_set().get_aggregate(functype,column)
     96
     97    def get_aggregates(self,functypes,column):
     98        return self.get_query_set().get_aggregates(functypes,column)
     99
     100    def sum(self,fieldname):
     101        return self.get_query_set().sum(fieldname)
     102
     103    def min(self,fieldname):
     104        return self.get_query_set().min(fieldname)
     105
     106    def max(self,fieldname):
     107        return self.get_query_set().max(fieldname)
     108
     109    def avg(self,fieldname):
     110        return self.get_query_set().avg(fieldname)
     111
     112    def stddev(self,fieldname):
     113        return self.get_query_set().stddev(fieldname)
     114
     115    def median(self,fieldname):
     116        return self.get_query_set().median(fieldname)
     117
    90118class ManagerDescriptor(object):
    91119    # This class ensures managers aren't accessible via model instances.
    92120    # For example, Poll.objects works, but poll_obj.objects raises AttributeError.
  • django/db/models/query.py

     
    11from django.db import backend, connection
    2 from django.db.models.fields import DateField, FieldDoesNotExist
     2from django.db.models.fields import DateField, IntegerField, FloatField, FieldDoesNotExist
    33from django.db.models import signals
    44from django.dispatch import dispatcher
    55from django.utils.datastructures import SortedDict
     
    121121        combined._filters = self._filters | other._filters
    122122        return combined
    123123
     124    ##############################################
     125    # HELPER METHODS THAT EXAMINE FIELD METADATA #
     126    ##############################################
     127
     128    def is_number(self, fieldname):
     129        "Returns flag, True for integer. False for float. Non-float and non-integer raises either a FieldDoesNotExist or TypeError exception."
     130        field = self.model._meta.get_field(fieldname)
     131        # Let the FieldDoesNotExist exception propogate
     132        if isinstance(field, IntegerField):
     133            return True
     134        if isinstance(field, FloatField):
     135            return False
     136        raise TypeError, "Field %s for Model %s is not an IntegerField or FloatField" % (fieldname, self.model._meta.object_name)
     137
     138    def is_number_or_date(self, fieldname):
     139        "Returns 0 for int; 1 for float; 2 for date. Raises either a FieldDoesNotExist or TypeError exception if not an Integer, Float or Date."
     140        field = self.model._meta.get_field(fieldname)
     141        # Let the FieldDoesNotExist exception propogate
     142        if isinstance(field, IntegerField):
     143            return 0
     144        if isinstance(field, FloatField):
     145            return 1
     146        if isinstance(field, DateField):
     147            return 2
     148        raise TypeError, "Field %s for Model %s is not an IntegerField, FloatField or DateField" % (fieldname, self.model._meta.object_name)
     149
    124150    ####################################
    125151    # METHODS THAT DO DATABASE QUERIES #
    126152    ####################################
     
    149175                    setattr(obj, k[0], row[index_end+i])
    150176                yield obj
    151177
    152     def count(self):
    153         "Performs a SELECT COUNT() and returns the number of records as an integer."
     178    def count(self,fieldname="*"):
     179        "Performs a SELECT COUNT(column) and returns the number of records as an integer."
    154180        counter = self._clone()
    155181        counter._order_by = ()
    156182        counter._offset = None
    157183        counter._limit = None
    158184        counter._select_related = False
    159185        select, sql, params = counter._get_sql_clause()
     186        if fieldname == '*':
     187            column = '*'
     188        else:
     189            column = self.model._meta.get_field(fieldname).column
    160190        cursor = connection.cursor()
    161         cursor.execute("SELECT COUNT(*)" + sql, params)
     191        cursor.execute("SELECT COUNT(%s)" % (column) + sql, params)
    162192        return cursor.fetchone()[0]
    163193
     194    def get_aggregate(self,type,column):
     195        "Performs the specified aggregate function on the named column."
     196        agg = self._clone()
     197        agg._order_by = ()
     198        agg._offset = None
     199        agg._limit = None
     200        agg._select_related = False
     201        select, sql, params = agg._get_sql_clause()
     202        cursor = connection.cursor()
     203        sel = "SELECT %s(%s)" % (type, column)
     204        cursor.execute(sel + sql, params)
     205        return cursor.fetchone()[0]
     206 
     207    def get_aggregates(self,types,column):
     208        "Performs the specified aggregate functions on the named column."
     209        agg = self._clone()
     210        agg._order_by = ()
     211        agg._offset = None
     212        agg._limit = None
     213        agg._select_related = False
     214        select, sql, params = agg._get_sql_clause()
     215        cursor = connection.cursor()
     216        sel = []
     217        sel.append( "SELECT" )
     218        for type in types:
     219            sel.append ( "%s(%s)," % (type, column))
     220        select = " ".join(sel)[:-1]
     221        cursor.execute(select + sql, params)
     222        return cursor.fetchone()
     223 
     224    def sum(self, fieldname):
     225        "Performs a SELECT SUM() on the specified column."
     226        isInt = self.is_number(fieldname)
     227        column = self.model._meta.get_field(fieldname).column
     228        result = self.get_aggregate("SUM",column)
     229        if isInt:
     230            return int(result)
     231        return result
     232
     233    def avg(self, fieldname):
     234        "Performs a SELECT AVG() on the specified column."
     235        self.is_number(fieldname)
     236        column = self.model._meta.get_field(fieldname).column
     237        return self.get_aggregate("AVG",column)
     238
     239    def stddev(self, fieldname):
     240        "Performs a SELECT STDDEV() on the specified column."
     241        self.is_number(fieldname)
     242        column = self.model._meta.get_field(fieldname).column
     243        return self.get_aggregate("STDDEV",column)
     244
     245    def min(self, fieldname):
     246        "Performs a SELECT MIN() on the specified column."
     247        self.is_number_or_date(fieldname)
     248        column = self.model._meta.get_field(fieldname).column
     249        return self.get_aggregate("MIN",column)
     250
     251    def max(self, fieldname):
     252        "Performs a SELECT MAX() on the specified column."
     253        self.is_number_or_date(fieldname)
     254        column = self.model._meta.get_field(fieldname).column
     255        return self.get_aggregate("MAX",column)
     256
     257    def median(self, fieldname):
     258        "Returns the median value for the specified column."
     259        coltype = self.is_number_or_date(fieldname)
     260        column = self.model._meta.get_field(fieldname).column
     261        fetcher = self._clone()
     262        fetcher._order_by = (column,)
     263        fetcher._offset = None
     264        fetcher._limit = None
     265        fetcher._select_related = False
     266        select, sql, params = fetcher._get_sql_clause()
     267        sel = "SELECT %s" % (column)
     268        cursor = connection.cursor()
     269        cursor.execute(sel + sql, params)
     270        rows = cursor.fetchall()
     271        midvalue = len(rows) / 2
     272        if coltype == 2:
     273            # returning a date
     274            return str(rows[midvalue][0])
     275        else:
     276            return rows[midvalue][0]
     277
    164278    def get(self, *args, **kwargs):
    165279        "Performs the SELECT and returns a single object matching the given keyword arguments."
    166280        clone = self.filter(*args, **kwargs)
  • tests/modeltests/agg_funcs/models.py

     
     1"""
     2XXX. Aggregate Functions
     3
     4Aggregate functions are column-oriented functions like sum(), min()
     5max(), avg() and so forth.
     6
     7"""
     8
     9from django.db import models
     10
     11class Article(models.Model):
     12    headline = models.CharField(maxlength=100,null=True)
     13    pub_date = models.DateTimeField()
     14    pull_date = models.DateTimeField(null=True)
     15    wordcount = models.IntegerField()
     16    fee = models.FloatField(decimal_places=2,max_digits=10)
     17    class Meta:
     18        ordering = ('-pub_date', 'headline')
     19
     20    def __repr__(self):
     21        return self.headline
     22
     23API_TESTS = """
     24# Create a couple of Articles.
     25>>> from datetime import datetime
     26>>> a1 = Article(headline='Article 1', pub_date=datetime(2005, 7, 26), wordcount=25, fee=25.0)
     27>>> a1.save()
     28>>> a2 = Article(headline='Article 2', pub_date=datetime(2005, 7, 27), wordcount=75, fee=75.0)
     29>>> a2.save()
     30>>> a3 = Article(headline='Article 3', pub_date=datetime(2005, 7, 28), wordcount=55, fee=110.0)
     31>>> a3.save()
     32>>> a4 = Article(headline='Article 4', pub_date=datetime(2005, 7, 24), pull_date=datetime(2005, 8, 1), wordcount=125, fee=250.0)
     33>>> a4.save()
     34>>> a5 = Article(headline='Article 5', pub_date=datetime(2005, 7, 25), pull_date=datetime(2005, 8, 1), wordcount=100, fee=40.0)
     35>>> a5.save()
     36
     37# Test the aggregate functions
     38>>> Article.objects.count()
     395
     40
     41>>> Article.objects.count('pull_date')
     422
     43
     44>>> Article.objects.sum('fee')
     45500.0
     46
     47>>> Article.objects.sum('wordcount')
     48380
     49
     50>>> Article.objects.sum('headline')
     51Traceback (most recent call last):
     52  ...
     53TypeError: Field headline for Model Article is not an IntegerField or FloatField
     54
     55>>> Article.objects.sum('bar')
     56Traceback (most recent call last):
     57  ...
     58FieldDoesNotExist: name=bar
     59
     60>>> Article.objects.avg('fee')
     61100.0
     62
     63>>> Article.objects.max('wordcount')
     64125
     65
     66>>> Article.objects.min('wordcount')
     6725
     68
     69>>> Article.objects.median('wordcount')
     7075
     71
     72>>> Article.objects.median('fee')
     7375.0
     74
     75>>> Article.objects.get_aggregates(["SUM","MIN","MAX","AVG"],'fee')
     76[500.0, 25.0, 250.0, 100.0]
     77
     78>>> Article.objects.get_aggregate("AVG",'wordcount*fee')
     799510.0
     80
     81>>> Article.objects.get_aggregate("SUM",'wordcount+fee')
     82880.0
     83
     84>>> Article.objects.min('pub_date')
     85'2005-07-24 00:00:00'
     86
     87>>> Article.objects.max('pub_date')
     88'2005-07-28 00:00:00'
     89
     90>>> Article.objects.median('pub_date')
     91'2005-07-26 00:00:00'
     92
     93>>> Article.objects.filter(fee__gt=100.0).avg('wordcount')
     9490.0
     95"""
     96
     97from django.conf import settings
     98if settings.DATABASE_ENGINE != "sqlite3":
     99    API_TESTS += """
     100>>> Article.objects.stddev('wordcount')
     101(The expected value is not yet known. Replace me!)
     102"""
     103
  • docs/db-api.txt

     
    442442    >>> people.get_values(fields=['first_name'], distinct=True)
    443443    [{'first_name': 'Adrian'}, {'first_name': 'Jacob'}, {'first_name': 'Simon'}]
    444444
     445Aggregate Functions
     446===================
     447
     448Aggregate functions perform calculations on columns. Typically
     449they return a single value. They are in two groups: high_level
     450and low_level.
     451
     452High Level Functions
     453--------------------
     454
     455The high_level functions are sum(), min(), max(), avg(), stddev()
     456and median(). Each takes a fieldname as an argument. The type of
     457the field is checked for correctness as only certain datatypes are
     458allowed for each of the high level functions.
     459
     460sum(fieldname)
     461---------------
     462
     463Returns the sum of the named field. The field must be an
     464IntegerField or a FloatField. The returned value corresponds
     465with the type of the column.
     466
     467min(fieldname), max(fieldname)
     468--------------------------------
     469
     470Returns the minimum or maximum value of the named field. The field
     471must be an IntegerField, FloatField or DateField. The returned value
     472corresponds with the type of the field. (This is a string
     473representation if the field is a DateField.)
     474
     475avg(fieldname)
     476---------------
     477
     478Returns the average of the named field. The field must be an
     479IntegerField or a FloatField. The returned value is a Float.
     480
     481stddev(fieldname)
     482------------------
     483
     484Returns the standard deviation of the named field. The field must be an
     485IntegerField or a FloatField. The returned value is a Float.
     486(Not supported on sqlite3. You get an OperationError exception.)
     487
     488median(fieldname)
     489------------------
     490
     491Returns the median value of the named field. The field
     492must be an IntegerField, FloatField or DateField. The returned
     493value corresponds with the type of the field. (This is a string
     494representation if the column is a DateField.) Unlike the other
     495functions in this group, this function does not use the DB
     496supplied capabilities. It fetches all of the values of the field
     497ordered by that field and returns the middle value. (If there
     498are an even number of values, the second of the two middle
     499values is returned.)
     500
     501Low Level Functions
     502-------------------
     503
     504There are two low level functions: get_aggregate() and
     505get_aggregates(). They do minimal checking and allow for
     506powerful queries that potentially return multiple values
     507and/or combine multiple column arithmetically.
     508
     509The low_level functions take columnnames instead of fieldnames.
     510You must do your own conversion from fieldname to columnname
     511if you are taking advantage of the fieldname mapping. (By
     512default fieldnames and columnnames match each other and so
     513most users will not have to worry about this distinction.)
     514
     515get_aggregate(type,columnname)
     516------------------------------
     517
     518This function supplies direct support for all database-supplied
     519aggregate functions. The type parameter is the name of an aggregate
     520function such as 'SUM', 'VARIANCE' or so forth limited only by
     521what set of functions your particular database supports. The return
     522value uses whatever type your database connonically returns. (Most
     523databases return the same type as the named column, although this
     524is not the case for some functions such as "avg" or "stddev" which
     525always returns a Float. Also note that sqlite3 always returns a Float
     526for all aggregate function.)
     527
     528Note that the columnname is not explicitly checked for type and
     529so it is possible to combine columns arithmetically (with care!)
     530as follows:
     531
     532Inventory.objects.get_aggregate('AVG','quantity*price')
     533
     534This returns the average value of the 'quantity' column multiplied
     535by the 'price' column.
     536
     537Meals.objects.get_aggregate('MAX','price+tax+tip')
     538
     539This returns the highest priced meal which is calculated by the
     540database by adding the 'price', the 'tax' and the 'tip' columns.
     541
     542(As a repeat warning: Don't forget to get the columnname from your
     543fieldname if you are using fieldname mapping.)
     544
     545get_aggregates(types,columnname)
     546--------------------------------
     547
     548This function allows a single SQL operation to perform multiple
     549aggregate functions. The types field is an iterable list of
     550aggregate function names. The columnname is handled in the same
     551manner as with the get_aggregate() function. For example:
     552
     553Inventory.objects.get_aggregates(['AVG','MIN','MAX'],'quantity')
     554
     555The results are returned in an array.
     556
     557Usage
     558-----
     559
     560Typical use targets all of the rows in the targeted table.
     561For example:
     562
     563Articles.objects.sum('wordcount')
     564
     565However it is possible to combine the aggregate functions with
     566judicious filtering. For example:
     567
     568Poll.objects.filter(question__contains='football').min('pub_date')
     569
     570Exceptions
     571----------
     572
     573The most common exceptions encountered when using aggregate functions are:
     574
     575FieldDoesNotExist - the columnname is not found.
     576
     577TypeError - the named column uses an unsupported type.
     578
     579OperationError - the functype is not supported by the database.
     580
     581
    445582Other lookup options
    446583====================
    447584
Back to Top