Django

Code

Changeset 7030

Show
Ignore:
Timestamp:
01/26/08 07:23:54 (5 months ago)
Author:
mtredinnick
Message:

queryset-refactor: Converted the queryset iterator to be a real iterator and
only populate the result cache on demand. We actually populate the result cache
100 elements at a time, rather than one at a time for efficiency, but this is a
real win when the resultset contains 10,000 objects for example.

This also provides an efficient boolean (nonzero) test that doesn't use up
a lot of memory if you don't read all the results.

Refs #2430, #5987.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • django/branches/queryset-refactor/django/db/models/query.py

    r6899 r7030  
    2222# when deleting objects). 
    2323CHUNK_SIZE = 100 
     24ITER_CHUNK_SIZE = CHUNK_SIZE 
    2425 
    2526class _QuerySet(object): 
     
    2930        self.query = query or sql.Query(self.model, connection) 
    3031        self._result_cache = None 
     32        self._iter = None 
    3133 
    3234    ######################## 
     
    3537 
    3638    def __repr__(self): 
    37         return repr(self._get_data()) 
     39        return repr(list(iter(self))) 
    3840 
    3941    def __len__(self): 
    40         return len(self._get_data()) 
     42        return len(list(iter(self))) 
    4143 
    4244    def __iter__(self): 
    43         return iter(self._get_data()) 
     45        pos = 0 
     46        if self._result_cache is None: 
     47            self._iter = self.iterator() 
     48            self._result_cache = [] 
     49        while 1: 
     50            upper = len(self._result_cache) 
     51            while pos < upper: 
     52                yield self._result_cache[pos] 
     53                pos = pos + 1 
     54            if not self._iter: 
     55                raise StopIteration 
     56            if len(self._result_cache) <= pos: 
     57                self._fill_cache() 
     58 
     59    def __nonzero__(self): 
     60        if self._result_cache is None: 
     61            try: 
     62                iter(self).next() 
     63            except StopIteration: 
     64                return False 
     65        return True 
    4466 
    4567    def __getitem__(self, k): 
     
    5375 
    5476        if self._result_cache is not None: 
     77            if self._iter is not None: 
     78                # The result cache has only been partially populated, so we may 
     79                # need to fill it out a bit more. 
     80                if isinstance(k, slice): 
     81                    bound = k.stop 
     82                else: 
     83                    bound = k + 1 
     84                if len(self._result_cache) < bound: 
     85                    self._fill_cache(bound - len(self._result_cache)) 
    5586            return self._result_cache[k] 
    5687 
     
    376407        return c 
    377408 
    378     def _get_data(self): 
    379         if self._result_cache is None: 
    380             self._result_cache = list(self.iterator()) 
    381         return self._result_cache 
     409    def _fill_cache(self, num=None): 
     410        """ 
     411        Fills the result cache with 'num' more entries (or until the results 
     412        iterator is exhausted). 
     413        """ 
     414        if self._iter: 
     415            try: 
     416                for i in range(num or ITER_CHUNK_SIZE): 
     417                    self._result_cache.append(self._iter.next()) 
     418            except StopIteration: 
     419                self._iter = None 
    382420 
    383421# Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet. 
     
    395433        # QuerySet.clone() will also set up the _fields attribute with the 
    396434        # names of the model fields to select. 
     435 
     436    def __iter__(self): 
     437        return self.iterator() 
    397438 
    398439    def iterator(self): 
  • django/branches/queryset-refactor/tests/regressiontests/queries/models.py

    r6968 r7030  
    502502>>> len(Item.objects.dates('created', 'day')) 
    5035032 
     504 
     505Test that parallel iterators work. 
     506 
     507>>> qs = Tag.objects.all() 
     508>>> i1, i2 = iter(qs), iter(qs) 
     509>>> i1.next(), i1.next() 
     510(<Tag: t1>, <Tag: t2>) 
     511>>> i2.next(), i2.next(), i2.next() 
     512(<Tag: t1>, <Tag: t2>, <Tag: t3>) 
     513>>> i1.next() 
     514<Tag: t3> 
     515 
     516We can do slicing beyond what is currently in the result cache, too. 
     517 
     518# We need to mess with the implemenation internals a bit here to decrease the 
     519# cache fill size so that we don't read all the results at once. 
     520>>> from django.db.models import query 
     521>>> query.ITER_CHUNK_SIZE = 2 
     522>>> qs = Tag.objects.all() 
     523 
     524# Fill the cache with the first chunk. 
     525>>> bool(qs) 
     526True 
     527>>> len(qs._result_cache) 
     5282 
     529 
     530# Query beyond the end of the cache and check that it is filled out as required. 
     531>>> qs[4] 
     532<Tag: t5> 
     533>>> len(qs._result_cache) 
     5345 
     535 
     536# But querying beyond the end of the result set will fail. 
     537>>> qs[100] 
     538Traceback (most recent call last): 
     539... 
     540IndexError: ... 
    504541"""} 
    505542