Code

Ticket #1435: agg_funcs.diff

File agg_funcs.diff, 16.0 KB (added by jacob, 8 years ago)
Line 
1Index: django/db/models/manager.py
2===================================================================
3--- django/db/models/manager.py (revision 2409)
4+++ django/db/models/manager.py (working copy)
5@@ -51,8 +51,10 @@
6         # Returns a caching QuerySet.
7         return self.get_query_set()
8 
9-    def count(self):
10-        return self.get_query_set().count()
11+    def count(self,fieldname="*"):
12+        """Returns the number of rows in the default case. Else returns the number of non-null entries in the column corresponding to the fieldname.
13+        """
14+        return self.get_query_set().count(fieldname)
15 
16     def dates(self, *args, **kwargs):
17         return self.get_query_set().dates(*args, **kwargs)
18@@ -87,6 +89,32 @@
19     def values(self, *args, **kwargs):
20         return self.get_query_set().values(*args, **kwargs)
21 
22+    # Aggregate functions (column-oriented)
23+
24+    def get_aggregate(self,functype,column):
25+        return self.get_query_set().get_aggregate(functype,column)
26+
27+    def get_aggregates(self,functypes,column):
28+        return self.get_query_set().get_aggregates(functypes,column)
29+
30+    def sum(self,fieldname):
31+        return self.get_query_set().sum(fieldname)
32+
33+    def min(self,fieldname):
34+        return self.get_query_set().min(fieldname)
35+
36+    def max(self,fieldname):
37+        return self.get_query_set().max(fieldname)
38+
39+    def avg(self,fieldname):
40+        return self.get_query_set().avg(fieldname)
41+
42+    def stddev(self,fieldname):
43+        return self.get_query_set().stddev(fieldname)
44+
45+    def median(self,fieldname):
46+        return self.get_query_set().median(fieldname)
47+
48 class ManagerDescriptor(object):
49     # This class ensures managers aren't accessible via model instances.
50     # For example, Poll.objects works, but poll_obj.objects raises AttributeError.
51Index: django/db/models/query.py
52===================================================================
53--- django/db/models/query.py   (revision 2409)
54+++ django/db/models/query.py   (working copy)
55@@ -1,5 +1,5 @@
56 from django.db import backend, connection
57-from django.db.models.fields import DateField, FieldDoesNotExist
58+from django.db.models.fields import DateField, IntegerField, FloatField, FieldDoesNotExist
59 from django.db.models import signals
60 from django.dispatch import dispatcher
61 from django.utils.datastructures import SortedDict
62@@ -121,6 +121,32 @@
63         combined._filters = self._filters | other._filters
64         return combined
65 
66+    ##############################################
67+    # HELPER METHODS THAT EXAMINE FIELD METADATA #
68+    ##############################################
69+
70+    def is_number(self, fieldname):
71+        "Returns flag, True for integer. False for float. Non-float and non-integer raises either a FieldDoesNotExist or TypeError exception."
72+        field = self.model._meta.get_field(fieldname)
73+        # Let the FieldDoesNotExist exception propogate
74+       if isinstance(field, IntegerField):
75+            return True
76+        if isinstance(field, FloatField):
77+            return False
78+        raise TypeError, "Field %s for Model %s is not an IntegerField or FloatField" % (fieldname, self.model._meta.object_name)
79+
80+    def is_number_or_date(self, fieldname):
81+        "Returns 0 for int; 1 for float; 2 for date. Raises either a FieldDoesNotExist or TypeError exception if not an Integer, Float or Date."
82+        field = self.model._meta.get_field(fieldname)
83+        # Let the FieldDoesNotExist exception propogate
84+        if isinstance(field, IntegerField):
85+            return 0
86+        if isinstance(field, FloatField):
87+            return 1
88+        if isinstance(field, DateField):
89+            return 2
90+        raise TypeError, "Field %s for Model %s is not an IntegerField, FloatField or DateField" % (fieldname, self.model._meta.object_name)
91+
92     ####################################
93     # METHODS THAT DO DATABASE QUERIES #
94     ####################################
95@@ -149,18 +175,106 @@
96                     setattr(obj, k[0], row[index_end+i])
97                 yield obj
98 
99-    def count(self):
100-        "Performs a SELECT COUNT() and returns the number of records as an integer."
101+    def count(self,fieldname="*"):
102+        "Performs a SELECT COUNT(column) and returns the number of records as an integer."
103         counter = self._clone()
104         counter._order_by = ()
105         counter._offset = None
106         counter._limit = None
107         counter._select_related = False
108         select, sql, params = counter._get_sql_clause()
109+        if fieldname == '*':
110+            column = '*'
111+        else:
112+            column = self.model._meta.get_field(fieldname).column
113         cursor = connection.cursor()
114-        cursor.execute("SELECT COUNT(*)" + sql, params)
115+        cursor.execute("SELECT COUNT(%s)" % (column) + sql, params)
116         return cursor.fetchone()[0]
117 
118+    def get_aggregate(self,type,column):
119+        "Performs the specified aggregate function on the named column."
120+        agg = self._clone()
121+        agg._order_by = ()
122+        agg._offset = None
123+        agg._limit = None
124+        agg._select_related = False
125+        select, sql, params = agg._get_sql_clause()
126+        cursor = connection.cursor()
127+        sel = "SELECT %s(%s)" % (type, column)
128+        cursor.execute(sel + sql, params)
129+        return cursor.fetchone()[0]
130
131+    def get_aggregates(self,types,column):
132+        "Performs the specified aggregate functions on the named column."
133+        agg = self._clone()
134+        agg._order_by = ()
135+        agg._offset = None
136+        agg._limit = None
137+        agg._select_related = False
138+        select, sql, params = agg._get_sql_clause()
139+        cursor = connection.cursor()
140+        sel = []
141+        sel.append( "SELECT" )
142+        for type in types:
143+            sel.append ( "%s(%s)," % (type, column))
144+        select = " ".join(sel)[:-1]
145+        cursor.execute(select + sql, params)
146+        return cursor.fetchone()
147
148+    def sum(self, fieldname):
149+        "Performs a SELECT SUM() on the specified column."
150+        isInt = self.is_number(fieldname)
151+        column = self.model._meta.get_field(fieldname).column
152+        result = self.get_aggregate("SUM",column)
153+        if isInt:
154+            return int(result)
155+        return result
156+
157+    def avg(self, fieldname):
158+        "Performs a SELECT AVG() on the specified column."
159+        self.is_number(fieldname)
160+        column = self.model._meta.get_field(fieldname).column
161+        return self.get_aggregate("AVG",column)
162+
163+    def stddev(self, fieldname):
164+        "Performs a SELECT STDDEV() on the specified column."
165+        self.is_number(fieldname)
166+        column = self.model._meta.get_field(fieldname).column
167+        return self.get_aggregate("STDDEV",column)
168+
169+    def min(self, fieldname):
170+        "Performs a SELECT MIN() on the specified column."
171+        self.is_number_or_date(fieldname)
172+        column = self.model._meta.get_field(fieldname).column
173+        return self.get_aggregate("MIN",column)
174+
175+    def max(self, fieldname):
176+        "Performs a SELECT MAX() on the specified column."
177+        self.is_number_or_date(fieldname)
178+        column = self.model._meta.get_field(fieldname).column
179+        return self.get_aggregate("MAX",column)
180+
181+    def median(self, fieldname):
182+        "Returns the median value for the specified column."
183+        coltype = self.is_number_or_date(fieldname)
184+        column = self.model._meta.get_field(fieldname).column
185+        fetcher = self._clone()
186+        fetcher._order_by = (column,)
187+        fetcher._offset = None
188+        fetcher._limit = None
189+        fetcher._select_related = False
190+        select, sql, params = fetcher._get_sql_clause()
191+        sel = "SELECT %s" % (column)
192+        cursor = connection.cursor()
193+        cursor.execute(sel + sql, params)
194+        rows = cursor.fetchall()
195+        midvalue = len(rows) / 2
196+        if coltype == 2:
197+            # returning a date
198+            return str(rows[midvalue][0])
199+        else:
200+            return rows[midvalue][0]
201+
202     def get(self, *args, **kwargs):
203         "Performs the SELECT and returns a single object matching the given keyword arguments."
204         clone = self.filter(*args, **kwargs)
205Index: tests/modeltests/agg_funcs/__init__.py
206===================================================================
207Index: tests/modeltests/agg_funcs/models.py
208===================================================================
209--- tests/modeltests/agg_funcs/models.py        (revision 0)
210+++ tests/modeltests/agg_funcs/models.py        (revision 0)
211@@ -0,0 +1,103 @@
212+"""
213+XXX. Aggregate Functions
214+
215+Aggregate functions are column-oriented functions like sum(), min()
216+max(), avg() and so forth.
217+
218+"""
219+
220+from django.db import models
221+
222+class Article(models.Model):
223+    headline = models.CharField(maxlength=100,null=True)
224+    pub_date = models.DateTimeField()
225+    pull_date = models.DateTimeField(null=True)
226+    wordcount = models.IntegerField()
227+    fee = models.FloatField(decimal_places=2,max_digits=10)
228+    class Meta:
229+        ordering = ('-pub_date', 'headline')
230+
231+    def __repr__(self):
232+        return self.headline
233+
234+API_TESTS = """
235+# Create a couple of Articles.
236+>>> from datetime import datetime
237+>>> a1 = Article(headline='Article 1', pub_date=datetime(2005, 7, 26), wordcount=25, fee=25.0)
238+>>> a1.save()
239+>>> a2 = Article(headline='Article 2', pub_date=datetime(2005, 7, 27), wordcount=75, fee=75.0)
240+>>> a2.save()
241+>>> a3 = Article(headline='Article 3', pub_date=datetime(2005, 7, 28), wordcount=55, fee=110.0)
242+>>> a3.save()
243+>>> a4 = Article(headline='Article 4', pub_date=datetime(2005, 7, 24), pull_date=datetime(2005, 8, 1), wordcount=125, fee=250.0)
244+>>> a4.save()
245+>>> a5 = Article(headline='Article 5', pub_date=datetime(2005, 7, 25), pull_date=datetime(2005, 8, 1), wordcount=100, fee=40.0)
246+>>> a5.save()
247+
248+# Test the aggregate functions
249+>>> Article.objects.count()
250+5
251+
252+>>> Article.objects.count('pull_date')
253+2
254+
255+>>> Article.objects.sum('fee')
256+500.0
257+
258+>>> Article.objects.sum('wordcount')
259+380
260+
261+>>> Article.objects.sum('headline')
262+Traceback (most recent call last):
263+  ...
264+TypeError: Field headline for Model Article is not an IntegerField or FloatField
265+
266+>>> Article.objects.sum('bar')
267+Traceback (most recent call last):
268+  ...
269+FieldDoesNotExist: name=bar
270+
271+>>> Article.objects.avg('fee')
272+100.0
273+
274+>>> Article.objects.max('wordcount')
275+125
276+
277+>>> Article.objects.min('wordcount')
278+25
279+
280+>>> Article.objects.median('wordcount')
281+75
282+
283+>>> Article.objects.median('fee')
284+75.0
285+
286+>>> Article.objects.get_aggregates(["SUM","MIN","MAX","AVG"],'fee')
287+[500.0, 25.0, 250.0, 100.0]
288+
289+>>> Article.objects.get_aggregate("AVG",'wordcount*fee')
290+9510.0
291+
292+>>> Article.objects.get_aggregate("SUM",'wordcount+fee')
293+880.0
294+
295+>>> Article.objects.min('pub_date')
296+'2005-07-24 00:00:00'
297+
298+>>> Article.objects.max('pub_date')
299+'2005-07-28 00:00:00'
300+
301+>>> Article.objects.median('pub_date')
302+'2005-07-26 00:00:00'
303+
304+>>> Article.objects.filter(fee__gt=100.0).avg('wordcount')
305+90.0
306+"""
307+
308+from django.conf import settings
309+if settings.DATABASE_ENGINE != "sqlite3":
310+    API_TESTS += """
311+>>> Article.objects.stddev('wordcount')
312+(The expected value is not yet known. Replace me!)
313+"""
314+
315Index: docs/db-api.txt
316===================================================================
317--- docs/db-api.txt     (revision 2409)
318+++ docs/db-api.txt     (working copy)
319@@ -442,6 +442,143 @@
320     >>> people.get_values(fields=['first_name'], distinct=True)
321     [{'first_name': 'Adrian'}, {'first_name': 'Jacob'}, {'first_name': 'Simon'}]
322 
323+Aggregate Functions
324+===================
325+
326+Aggregate functions perform calculations on columns. Typically
327+they return a single value. They are in two groups: high_level
328+and low_level.
329+
330+High Level Functions
331+--------------------
332+
333+The high_level functions are sum(), min(), max(), avg(), stddev()
334+and median(). Each takes a fieldname as an argument. The type of
335+the field is checked for correctness as only certain datatypes are
336+allowed for each of the high level functions.
337+
338+sum(fieldname)
339+---------------
340+
341+Returns the sum of the named field. The field must be an
342+IntegerField or a FloatField. The returned value corresponds
343+with the type of the column.
344+
345+min(fieldname), max(fieldname)
346+--------------------------------
347+
348+Returns the minimum or maximum value of the named field. The field
349+must be an IntegerField, FloatField or DateField. The returned value
350+corresponds with the type of the field. (This is a string
351+representation if the field is a DateField.)
352+
353+avg(fieldname)
354+---------------
355+
356+Returns the average of the named field. The field must be an
357+IntegerField or a FloatField. The returned value is a Float.
358+
359+stddev(fieldname)
360+------------------
361+
362+Returns the standard deviation of the named field. The field must be an
363+IntegerField or a FloatField. The returned value is a Float.
364+(Not supported on sqlite3. You get an OperationError exception.)
365+
366+median(fieldname)
367+------------------
368+
369+Returns the median value of the named field. The field
370+must be an IntegerField, FloatField or DateField. The returned
371+value corresponds with the type of the field. (This is a string
372+representation if the column is a DateField.) Unlike the other
373+functions in this group, this function does not use the DB
374+supplied capabilities. It fetches all of the values of the field
375+ordered by that field and returns the middle value. (If there
376+are an even number of values, the second of the two middle
377+values is returned.)
378+
379+Low Level Functions
380+-------------------
381+
382+There are two low level functions: get_aggregate() and
383+get_aggregates(). They do minimal checking and allow for
384+powerful queries that potentially return multiple values
385+and/or combine multiple column arithmetically.
386+
387+The low_level functions take columnnames instead of fieldnames.
388+You must do your own conversion from fieldname to columnname
389+if you are taking advantage of the fieldname mapping. (By
390+default fieldnames and columnnames match each other and so
391+most users will not have to worry about this distinction.)
392+
393+get_aggregate(type,columnname)
394+------------------------------
395+
396+This function supplies direct support for all database-supplied
397+aggregate functions. The type parameter is the name of an aggregate
398+function such as 'SUM', 'VARIANCE' or so forth limited only by
399+what set of functions your particular database supports. The return
400+value uses whatever type your database connonically returns. (Most
401+databases return the same type as the named column, although this
402+is not the case for some functions such as "avg" or "stddev" which
403+always returns a Float. Also note that sqlite3 always returns a Float
404+for all aggregate function.)
405+
406+Note that the columnname is not explicitly checked for type and
407+so it is possible to combine columns arithmetically (with care!)
408+as follows:
409+
410+Inventory.objects.get_aggregate('AVG','quantity*price')
411+
412+This returns the average value of the 'quantity' column multiplied
413+by the 'price' column.
414+
415+Meals.objects.get_aggregate('MAX','price+tax+tip')
416+
417+This returns the highest priced meal which is calculated by the
418+database by adding the 'price', the 'tax' and the 'tip' columns.
419+
420+(As a repeat warning: Don't forget to get the columnname from your
421+fieldname if you are using fieldname mapping.)
422+
423+get_aggregates(types,columnname)
424+--------------------------------
425+
426+This function allows a single SQL operation to perform multiple
427+aggregate functions. The types field is an iterable list of
428+aggregate function names. The columnname is handled in the same
429+manner as with the get_aggregate() function. For example:
430+
431+Inventory.objects.get_aggregates(['AVG','MIN','MAX'],'quantity')
432+
433+The results are returned in an array.
434+
435+Usage
436+-----
437+
438+Typical use targets all of the rows in the targeted table.
439+For example:
440+
441+Articles.objects.sum('wordcount')
442+
443+However it is possible to combine the aggregate functions with
444+judicious filtering. For example:
445+
446+Poll.objects.filter(question__contains='football').min('pub_date')
447+
448+Exceptions
449+----------
450+
451+The most common exceptions encountered when using aggregate functions are:
452+
453+FieldDoesNotExist - the columnname is not found.
454+
455+TypeError - the named column uses an unsupported type.
456+
457+OperationError - the functype is not supported by the database.
458+
459+
460 Other lookup options
461 ====================
462