Django

Code

root/django/trunk/django/db/models/query.py

Revision 9202, 30.9 kB (checked in by mtredinnick, 2 months ago)

Fixed #6748 -- When printing the repr() of querysets, don't load or display
more than 20 objects.

This means that accidentally executing HugeStoryArchive?.objects.all() at the
interactive prompt (or in the debug template) won't try to load all 4,233,010
stories into memory and print them out. That would previously cause resource
starvation and other "interesting" crashes.

If you really, really want the previous behaviour (e.g. in a doctest that
prints more than 20 items), display "list(qs)" instead of just "qs".

  • Property svn:eol-style set to native
Line 
1 try:
2     set
3 except NameError:
4     from sets import Set as set     # Python 2.3 fallback
5
6 from django.db import connection, transaction, IntegrityError
7 from django.db.models.fields import DateField
8 from django.db.models.query_utils import Q, select_related_descend
9 from django.db.models import signals, sql
10 from django.utils.datastructures import SortedDict
11
12
13 # Used to control how many objects are worked with at once in some cases (e.g.
14 # when deleting objects).
15 CHUNK_SIZE = 100
16 ITER_CHUNK_SIZE = CHUNK_SIZE
17
18 # The maximum number of items to display in a QuerySet.__repr__
19 REPR_OUTPUT_SIZE = 20
20
21 # Pull into this namespace for backwards compatibility.
22 EmptyResultSet = sql.EmptyResultSet
23
24
25 class CyclicDependency(Exception):
26     """
27     An error when dealing with a collection of objects that have a cyclic
28     dependency, i.e. when deleting multiple objects.
29     """
30     pass
31
32
33 class CollectedObjects(object):
34     """
35     A container that stores keys and lists of values along with remembering the
36     parent objects for all the keys.
37
38     This is used for the database object deletion routines so that we can
39     calculate the 'leaf' objects which should be deleted first.
40     """
41
42     def __init__(self):
43         self.data = {}
44         self.children = {}
45
46     def add(self, model, pk, obj, parent_model, nullable=False):
47         """
48         Adds an item to the container.
49
50         Arguments:
51         * model - the class of the object being added.
52         * pk - the primary key.
53         * obj - the object itself.
54         * parent_model - the model of the parent object that this object was
55           reached through.
56         * nullable - should be True if this relation is nullable.
57
58         Returns True if the item already existed in the structure and
59         False otherwise.
60         """
61         d = self.data.setdefault(model, SortedDict())
62         retval = pk in d
63         d[pk] = obj
64         # Nullable relationships can be ignored -- they are nulled out before
65         # deleting, and therefore do not affect the order in which objects
66         # have to be deleted.
67         if parent_model is not None and not nullable:
68             self.children.setdefault(parent_model, []).append(model)
69         return retval
70
71     def __contains__(self, key):
72         return self.data.__contains__(key)
73
74     def __getitem__(self, key):
75         return self.data[key]
76
77     def __nonzero__(self):
78         return bool(self.data)
79
80     def iteritems(self):
81         for k in self.ordered_keys():
82             yield k, self[k]
83
84     def items(self):
85         return list(self.iteritems())
86
87     def keys(self):
88         return self.ordered_keys()
89
90     def ordered_keys(self):
91         """
92         Returns the models in the order that they should be dealt with (i.e.
93         models with no dependencies first).
94         """
95         dealt_with = SortedDict()
96         # Start with items that have no children
97         models = self.data.keys()
98         while len(dealt_with) < len(models):
99             found = False
100             for model in models:
101                 if model in dealt_with:
102                     continue
103                 children = self.children.setdefault(model, [])
104                 if len([c for c in children if c not in dealt_with]) == 0:
105                     dealt_with[model] = None
106                     found = True
107             if not found:
108                 raise CyclicDependency(
109                     "There is a cyclic dependency of items to be processed.")
110
111         return dealt_with.keys()
112
113     def unordered_keys(self):
114         """
115         Fallback for the case where is a cyclic dependency but we don't  care.
116         """
117         return self.data.keys()
118
119
120 class QuerySet(object):
121     """
122     Represents a lazy database lookup for a set of objects.
123     """
124     def __init__(self, model=None, query=None):
125         self.model = model
126         self.query = query or sql.Query(self.model, connection)
127         self._result_cache = None
128         self._iter = None
129         self._sticky_filter = False
130
131     ########################
132     # PYTHON MAGIC METHODS #
133     ########################
134
135     def __getstate__(self):
136         """
137         Allows the QuerySet to be pickled.
138         """
139         # Force the cache to be fully populated.
140         len(self)
141
142         obj_dict = self.__dict__.copy()
143         obj_dict['_iter'] = None
144         return obj_dict
145
146     def __repr__(self):
147         data = list(self[:REPR_OUTPUT_SIZE + 1])
148         if len(data) > REPR_OUTPUT_SIZE:
149             data[-1] = "...(remaining elements truncated)..."
150         return repr(data)
151
152     def __len__(self):
153         # Since __len__ is called quite frequently (for example, as part of
154         # list(qs), we make some effort here to be as efficient as possible
155         # whilst not messing up any existing iterators against the QuerySet.
156         if self._result_cache is None:
157             if self._iter:
158                 self._result_cache = list(self._iter)
159             else:
160                 self._result_cache = list(self.iterator())
161         elif self._iter:
162             self._result_cache.extend(list(self._iter))
163         return len(self._result_cache)
164
165     def __iter__(self):
166         if self._result_cache is None:
167             self._iter = self.iterator()
168             self._result_cache = []
169         if self._iter:
170             return self._result_iter()
171         # Python's list iterator is better than our version when we're just
172         # iterating over the cache.
173         return iter(self._result_cache)
174
175     def _result_iter(self):
176         pos = 0
177         while 1:
178             upper = len(self._result_cache)
179             while pos < upper:
180                 yield self._result_cache[pos]
181                 pos = pos + 1
182             if not self._iter:
183                 raise StopIteration
184             if len(self._result_cache) <= pos:
185                 self._fill_cache()
186
187     def __nonzero__(self):
188         if self._result_cache is not None:
189             return bool(self._result_cache)
190         try:
191             iter(self).next()
192         except StopIteration:
193             return False
194         return True
195
196     def __getitem__(self, k):
197         """
198         Retrieves an item or slice from the set of results.
199         """
200         if not isinstance(k, (slice, int, long)):
201             raise TypeError
202         assert ((not isinstance(k, slice) and (k >= 0))
203                 or (isinstance(k, slice) and (k.start is None or k.start >= 0)
204                     and (k.stop is None or k.stop >= 0))), \
205                 "Negative indexing is not supported."
206
207         if self._result_cache is not None:
208             if self._iter is not None:
209                 # The result cache has only been partially populated, so we may
210                 # need to fill it out a bit more.
211                 if isinstance(k, slice):
212                     if k.stop is not None:
213                         # Some people insist on passing in strings here.
214                         bound = int(k.stop)
215                     else:
216                         bound = None
217                 else:
218                     bound = k + 1
219                 if len(self._result_cache) < bound:
220                     self._fill_cache(bound - len(self._result_cache))
221             return self._result_cache[k]
222
223         if isinstance(k, slice):
224             qs = self._clone()
225             if k.start is not None:
226                 start = int(k.start)
227             else:
228                 start = None
229             if k.stop is not None:
230                 stop = int(k.stop)
231             else:
232                 stop = None
233             qs.query.set_limits(start, stop)
234             return k.step and list(qs)[::k.step] or qs
235         try:
236             qs = self._clone()
237             qs.query.set_limits(k, k + 1)
238             return list(qs)[0]
239         except self.model.DoesNotExist, e:
240             raise IndexError, e.args
241
242     def __and__(self, other):
243         self._merge_sanity_check(other)
244         if isinstance(other, EmptyQuerySet):
245             return other._clone()
246         combined = self._clone()
247         combined.query.combine(other.query, sql.AND)
248         return combined
249
250     def __or__(self, other):
251         self._merge_sanity_check(other)
252         combined = self._clone()
253         if isinstance(other, EmptyQuerySet):
254             return combined
255         combined.query.combine(other.query, sql.OR)
256         return combined
257
258     ####################################
259     # METHODS THAT DO DATABASE QUERIES #
260     ####################################
261
262     def iterator(self):
263         """
264         An iterator over the results from applying this QuerySet to the
265         database.
266         """
267         fill_cache = self.query.select_related
268         if isinstance(fill_cache, dict):
269             requested = fill_cache
270         else:
271             requested = None
272         max_depth = self.query.max_depth
273         extra_select = self.query.extra_select.keys()
274         index_start = len(extra_select)
275         for row in self.query.results_iter():
276             if fill_cache:
277                 obj, _ = get_cached_row(self.model, row, index_start,
278                         max_depth, requested=requested)
279             else:
280                 obj = self.model(*row[index_start:])
281             for i, k in enumerate(extra_select):
282                 setattr(obj, k, row[i])
283             yield obj
284
285     def count(self):
286         """
287         Performs a SELECT COUNT() and returns the number of records as an
288         integer.
289
290         If the QuerySet is already fully cached this simply returns the length
291         of the cached results set to avoid multiple SELECT COUNT(*) calls.
292         """
293         if self._result_cache is not None and not self._iter:
294             return len(self._result_cache)
295
296         return self.query.get_count()
297
298     def get(self, *args, **kwargs):
299         """
300         Performs the query and returns a single object matching the given
301         keyword arguments.
302         """
303         clone = self.filter(*args, **kwargs)
304         num = len(clone)
305         if num == 1:
306             return clone._result_cache[0]
307         if not num:
308             raise self.model.DoesNotExist("%s matching query does not exist."
309                     % self.model._meta.object_name)
310         raise self.model.MultipleObjectsReturned("get() returned more than one %s -- it returned %s! Lookup parameters were %s"
311                 % (self.model._meta.object_name, num, kwargs))
312
313     def create(self, **kwargs):
314         """
315         Creates a new object with the given kwargs, saving it to the database
316         and returning the created object.
317         """
318         obj = self.model(**kwargs)
319         obj.save(force_insert=True)
320         return obj
321
322     def get_or_create(self, **kwargs):
323         """
324         Looks up an object with the given kwargs, creating one if necessary.
325         Returns a tuple of (object, created), where created is a boolean
326         specifying whether an object was created.
327         """
328         assert kwargs, \
329                 'get_or_create() must be passed at least one keyword argument'
330         defaults = kwargs.pop('defaults', {})
331         try:
332             return self.get(**kwargs), False
333         except self.model.DoesNotExist:
334             try:
335                 params = dict([(k, v) for k, v in kwargs.items() if '__' not in k])
336                 params.update(defaults)
337                 obj = self.model(**params)
338                 sid = transaction.savepoint()
339                 obj.save(force_insert=True)
340                 transaction.savepoint_commit(sid)
341                 return obj, True
342             except IntegrityError, e:
343                 transaction.savepoint_rollback(sid)
344                 try:
345                     return self.get(**kwargs), False
346                 except self.model.DoesNotExist:
347                     raise e
348
349     def latest(self, field_name=None):
350         """
351         Returns the latest object, according to the model's 'get_latest_by'
352         option or optional given field_name.
353         """
354         latest_by = field_name or self.model._meta.get_latest_by
355         assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model"
356         assert self.query.can_filter(), \
357                 "Cannot change a query once a slice has been taken."
358         obj = self._clone()
359         obj.query.set_limits(high=1)
360         obj.query.add_ordering('-%s' % latest_by)
361         return obj.get()
362
363     def in_bulk(self, id_list):
364         """
365         Returns a dictionary mapping each of the given IDs to the object with
366         that ID.
367         """
368         assert self.query.can_filter(), \
369                 "Cannot use 'limit' or 'offset' with in_bulk"
370         assert isinstance(id_list, (tuple,  list)), \
371                 "in_bulk() must be provided with a list of IDs."
372         if not id_list:
373             return {}
374         qs = self._clone()
375         qs.query.add_filter(('pk__in', id_list))
376         return dict([(obj._get_pk_val(), obj) for obj in qs.iterator()])
377
378     def delete(self):
379         """
380         Deletes the records in the current QuerySet.
381         """
382         assert self.query.can_filter(), \
383                 "Cannot use 'limit' or 'offset' with delete."
384
385         del_query = self._clone()
386
387         # Disable non-supported fields.
388         del_query.query.select_related = False
389         del_query.query.clear_ordering()
390
391         # Delete objects in chunks to prevent the list of related objects from
392         # becoming too long.
393         while 1:
394             # Collect all the objects to be deleted in this chunk, and all the
395             # objects that are related to the objects that are to be deleted.
396             seen_objs = CollectedObjects()
397             for object in del_query[:CHUNK_SIZE]:
398                 object._collect_sub_objects(seen_objs)
399
400             if not seen_objs:
401                 break
402             delete_objects(seen_objs)
403
404         # Clear the result cache, in case this QuerySet gets reused.
405         self._result_cache = None
406     delete.alters_data = True
407
408     def update(self, **kwargs):
409         """
410         Updates all elements in the current QuerySet, setting all the given
411         fields to the appropriate values.
412         """
413         assert self.query.can_filter(), \
414                 "Cannot update a query once a slice has been taken."
415         query = self.query.clone(sql.UpdateQuery)
416         query.add_update_values(kwargs)
417         rows = query.execute_sql(None)
418         transaction.commit_unless_managed()
419         self._result_cache = None
420         return rows
421     update.alters_data = True
422
423     def _update(self, values):
424         """
425         A version of update that accepts field objects instead of field names.
426         Used primarily for model saving and not intended for use by general
427         code (it requires too much poking around at model internals to be
428         useful at that level).
429         """
430         assert self.query.can_filter(), \
431                 "Cannot update a query once a slice has been taken."
432         query = self.query.clone(sql.UpdateQuery)
433         query.add_update_fields(values)
434         self._result_cache = None
435         return query.execute_sql(None)
436     _update.alters_data = True
437
438     ##################################################
439     # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
440     ##################################################
441
442     def values(self, *fields):
443         return self._clone(klass=ValuesQuerySet, setup=True, _fields=fields)
444
445     def values_list(self, *fields, **kwargs):
446         flat = kwargs.pop('flat', False)
447         if kwargs:
448             raise TypeError('Unexpected keyword arguments to values_list: %s'
449                     % (kwargs.keys(),))
450         if flat and len(fields) > 1:
451             raise TypeError("'flat' is not valid when values_list is called with more than one field.")
452         return self._clone(klass=ValuesListQuerySet, setup=True, flat=flat,
453                 _fields=fields)
454
455     def dates(self, field_name, kind, order='ASC'):
456         """
457         Returns a list of datetime objects representing all available dates for
458         the given field_name, scoped to 'kind'.
459         """
460         assert kind in ("month", "year", "day"), \
461                 "'kind' must be one of 'year', 'month' or 'day'."
462         assert order in ('ASC', 'DESC'), \
463                 "'order' must be either 'ASC' or 'DESC'."
464         return self._clone(klass=DateQuerySet, setup=True,
465                 _field_name=field_name, _kind=kind, _order=order)
466
467     def none(self):
468         """
469         Returns an empty QuerySet.
470         """
471         return self._clone(klass=EmptyQuerySet)
472
473     ##################################################################
474     # PUBLIC METHODS THAT ALTER ATTRIBUTES AND RETURN A NEW QUERYSET #
475     ##################################################################
476
477     def all(self):
478         """
479         Returns a new QuerySet that is a copy of the current one. This allows a
480         QuerySet to proxy for a model manager in some cases.
481         """
482         return self._clone()
483
484     def filter(self, *args, **kwargs):
485         """
486         Returns a new QuerySet instance with the args ANDed to the existing
487         set.
488         """
489         return self._filter_or_exclude(False, *args, **kwargs)
490
491     def exclude(self, *args, **kwargs):
492         """
493         Returns a new QuerySet instance with NOT (args) ANDed to the existing
494         set.
495         """
496         return self._filter_or_exclude(True, *args, **kwargs)
497
498     def _filter_or_exclude(self, negate, *args, **kwargs):
499         if args or kwargs:
500             assert self.query.can_filter(), \
501                     "Cannot filter a query once a slice has been taken."
502
503         clone = self._clone()
504         if negate:
505             clone.query.add_q(~Q(*args, **kwargs))
506         else:
507             clone.query.add_q(Q(*args, **kwargs))
508         return clone
509
510     def complex_filter(self, filter_obj):
511         """
512         Returns a new QuerySet instance with filter_obj added to the filters.
513
514         filter_obj can be a Q object (or anything with an add_to_query()
515         method) or a dictionary of keyword lookup arguments.
516
517         This exists to support framework features such as 'limit_choices_to',
518         and usually it will be more natural to use other methods.
519         """
520         if isinstance(filter_obj, Q) or hasattr(filter_obj, 'add_to_query'):
521             clone = self._clone()
522             clone.query.add_q(filter_obj)
523             return clone
524         else:
525             return self._filter_or_exclude(None, **filter_obj)
526
527     def select_related(self, *fields, **kwargs):
528         """
529         Returns a new QuerySet instance that will select related objects.
530
531         If fields are specified, they must be ForeignKey fields and only those
532         related objects are included in the selection.
533         """
534         depth = kwargs.pop('depth', 0)
535         if kwargs:
536             raise TypeError('Unexpected keyword arguments to select_related: %s'
537                     % (kwargs.keys(),))
538         obj = self._clone()
539         if fields:
540             if depth:
541                 raise TypeError('Cannot pass both "depth" and fields to select_related()')
542             obj.query.add_select_related(fields)
543         else:
544             obj.query.select_related = True
545         if depth:
546             obj.query.max_depth = depth
547         return obj
548
549     def dup_select_related(self, other):
550         """
551         Copies the related selection status from the QuerySet 'other' to the
552         current QuerySet.
553         """
554         self.query.select_related = other.query.select_related
555
556     def order_by(self, *field_names):
557         """
558         Returns a new QuerySet instance with the ordering changed.
559  &nb