Ticket #16937: prefetch_1.3.1.diff

File prefetch_1.3.1.diff, 23.5 KB (added by snyderra@…, 12 years ago)

for 1.3.1 stable release

  • django/db/models/fields/related.py

    diff -rupN Django-1.3.1/django/db/models/fields/related.py Django-1.3.1_prefetch/django/db/models/fields/related.py
    old new class ForeignRelatedObjectsDescriptor(ob  
    422422                db = self._db or router.db_for_read(rel_model, instance=instance)
    423423                return superclass.get_query_set(self).using(db).filter(**(self.core_filters))
    424424
     425            def get_prefetch_query_set(self, instances):
     426                """
     427                Return a queryset that does the bulk lookup needed
     428                by prefetch_related functionality.
     429                """
     430                if not instances:
     431                    return self.model.objects.none()
     432
     433                db = self._db or router.db_for_read(self.model, instance=instances[0])
     434                query = {'%s__%s__in' % (rel_field.name, attname):
     435                             [getattr(obj, attname) for obj in instances]}
     436                return super(RelatedManager, self).get_query_set().using(db).filter(**query)
     437
     438            def select_matching_instances(self, obj, related_objects):
     439                field_val = getattr(obj, attname)
     440                other_attname = rel_field.get_attname()
     441                return [rel_obj for rel_obj in related_objects
     442                        if getattr(rel_obj, other_attname) == field_val]
     443
     444            def all(self):
     445                try:
     446                    return self.instance._prefetched_objects_cache[rel_field.related_query_name()]
     447                except (AttributeError, KeyError):
     448                    return super(RelatedManager, self).all()
     449
    425450            def add(self, *objs):
    426451                for obj in objs:
    427452                    if not isinstance(obj, self.model):
    def create_many_related_manager(supercla  
    476501    and adds behavior for many-to-many related objects."""
    477502    through = rel.through
    478503    class ManyRelatedManager(superclass):
    479         def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None,
    480                 join_table=None, source_field_name=None, target_field_name=None,
    481                 reverse=False):
     504        def __init__(self, model=None, query_field_name=None, instance=None, symmetrical=None,
     505                      source_field_name=None, target_field_name=None, reverse=False,
     506                join_table=None, prefetch_cache_name=None):
    482507            super(ManyRelatedManager, self).__init__()
    483             self.core_filters = core_filters
     508            self.query_field_name = query_field_name
     509            self.core_filters = {'%s__pk' % query_field_name: instance._get_pk_val()}
    484510            self.model = model
    485511            self.symmetrical = symmetrical
    486512            self.instance = instance
    def create_many_related_manager(supercla  
    489515            self.through = through
    490516            self._pk_val = self.instance.pk
    491517            self.reverse = reverse
     518            self.prefetch_cache_name = prefetch_cache_name
    492519            if self._pk_val is None:
    493520                raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__)
    494521
    def create_many_related_manager(supercla  
    496523            db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance)
    497524            return superclass.get_query_set(self).using(db)._next_is_sticky().filter(**(self.core_filters))
    498525
     526        def get_prefetch_query_set(self, instances):
     527            if not instances:
     528                return self.model.objects.none()
     529
     530            from django.db import connections
     531
     532            db = self._db or router.db_for_read(self.model, instance=instances[0])
     533            query = {'%s__pk__in' % self.query_field_name:
     534                         [obj._get_pk_val() for obj in instances]}
     535            qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query)
     536
     537            # M2M: need to annotate the query in order to get the PK of the
     538            # primary model that the secondary model was actually related to.
     539
     540            # We know that there will already be a join on the join table, so we
     541            # can just add the select.
     542            join_table = self.through._meta.db_table
     543            pk_col = "%s_id" % self.source_field_name
     544            connection = connections[db]
     545            qn = connection.ops.quote_name
     546            qs = qs.extra(select={'_prefetch_related_pk':
     547                                      '%s.%s' % (qn(join_table), qn(pk_col))})
     548            return qs
     549
     550        def select_matching_instances(self, obj, related_objects):
     551            pk_val = obj._get_pk_val()
     552            return [rel_obj for rel_obj in related_objects
     553                    if rel_obj._prefetch_related_pk == pk_val]
     554
     555        def all(self):
     556            try:
     557                return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
     558            except (AttributeError, KeyError):
     559                return super(ManyRelatedManager, self).all()
     560
    499561        # If the ManyToMany relation has an intermediary model,
    500562        # the add and remove methods do not exist.
    501563        if rel.through._meta.auto_created:
    class ManyRelatedObjectsDescriptor(objec  
    671733
    672734        manager = RelatedManager(
    673735            model=rel_model,
    674             core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()},
     736            query_field_name=self.related.field.name,
     737            prefetch_cache_name=self.related.field.related_query_name(),
    675738            instance=instance,
    676739            symmetrical=False,
    677740            source_field_name=self.related.field.m2m_reverse_field_name(),
    class ReverseManyRelatedObjectsDescripto  
    723786
    724787        manager = RelatedManager(
    725788            model=rel_model,
    726             core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()},
     789            query_field_name=self.field.related_query_name(),
     790            prefetch_cache_name=self.field.name,
    727791            instance=instance,
    728792            symmetrical=self.field.rel.symmetrical,
    729793            source_field_name=self.field.m2m_field_name(),
  • django/db/models/manager.py

    diff -rupN Django-1.3.1/django/db/models/manager.py Django-1.3.1_prefetch/django/db/models/manager.py
    old new class Manager(object):  
    167167    def select_related(self, *args, **kwargs):
    168168        return self.get_query_set().select_related(*args, **kwargs)
    169169
     170    def prefetch_related(self, *args, **kwargs):
     171        return self.get_query_set().prefetch_related(*args, **kwargs)
     172
    170173    def values(self, *args, **kwargs):
    171174        return self.get_query_set().values(*args, **kwargs)
    172175
  • django/db/models/query.py

    diff -rupN Django-1.3.1/django/db/models/query.py Django-1.3.1_prefetch/django/db/models/query.py
    old new class QuerySet(object):  
    3737        self._iter = None
    3838        self._sticky_filter = False
    3939        self._for_write = False
     40        self._prefetch_related = set()
     41        self._prefetch_done = False
    4042
    4143    ########################
    4244    # PYTHON MAGIC METHODS #
    class QuerySet(object):  
    8284                self._result_cache = list(self.iterator())
    8385        elif self._iter:
    8486            self._result_cache.extend(self._iter)
     87        if self._prefetch_related and not self._prefetch_done:
     88            self._prefetch_related_objects()
    8589        return len(self._result_cache)
    8690
    8791    def __iter__(self):
     92        if self._prefetch_related:
     93            # We need all the results in order to be able to do the prefetch
     94            # in one go. To minimize code duplication, we use the __len__
     95            # code path which also forces this, and also does the prefetch
     96            len(self)
     97
    8898        if self._result_cache is None:
    8999            self._iter = self.iterator()
    90100            self._result_cache = []
    class QuerySet(object):  
    107117                self._fill_cache()
    108118
    109119    def __nonzero__(self):
     120        if self._prefetch_related:
     121            # We need all the results in order to be able to do the prefetch
     122            # in one go. To minimize code duplication, we use the __len__
     123            # code path which also forces this, and also does the prefetch
     124            len(self)
     125
    110126        if self._result_cache is not None:
    111127            return bool(self._result_cache)
    112128        try:
    class QuerySet(object):  
    496512            return self.query.has_results(using=self.db)
    497513        return bool(self._result_cache)
    498514
     515    def _prefetch_related_objects(self):
     516        # This method can only be called once the result cache has been filled.
     517        prefetch_related_objects(self._result_cache, self._prefetch_related)
     518        self._prefetch_done = True
     519
    499520    ##################################################
    500521    # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
    501522    ##################################################
    class QuerySet(object):  
    607628            obj.query.max_depth = depth
    608629        return obj
    609630
     631    def prefetch_related(self, *fields):
     632        """
     633        Returns a new QuerySet instance that will prefetch Many-To-One
     634        and Many-To-Many related objects when the QuerySet is evaluated.
     635
     636        The fields specified must be attributes that return a RelatedManager of
     637        some kind when used on instances of the evaluated QuerySet.
     638
     639        These RelatedManagers will be modified so that their 'all()' method will
     640        return a QuerySet whose cache is already filled with objects that were
     641        looked up in a single batch, rather than one query per object in the
     642        current QuerySet.
     643
     644        When prefetch_related() is called more than once, the list of fields to
     645        prefetch is added to. Call prefetch_related() with no arguments to
     646        clears the list.
     647        """
     648        if fields:
     649            new_fields = self._prefetch_related.union(set(fields))
     650        else:
     651            new_fields = set()
     652        return self._clone(_prefetch_related=new_fields)
     653
    610654    def dup_select_related(self, other):
    611655        """
    612656        Copies the related selection status from the QuerySet 'other' to the
    class QuerySet(object):  
    756800            query.filter_is_sticky = True
    757801        c = klass(model=self.model, query=query, using=self._db)
    758802        c._for_write = self._for_write
     803        c._prefetch_related = self._prefetch_related
    759804        c.__dict__.update(kwargs)
    760805        if setup and hasattr(c, '_setup_query'):
    761806            c._setup_query()
    def insert_query(model, values, return_i  
    14341479    query = sql.InsertQuery(model)
    14351480    query.insert_values(values, raw_values)
    14361481    return query.get_compiler(using=using).execute_sql(return_id)
     1482
     1483
     1484def prefetch_related_objects(result_cache, fields):
     1485    """
     1486    Populates prefetched objects caches for a list of results
     1487    from a QuerySet
     1488    """
     1489    from django.db.models.sql.constants import LOOKUP_SEP
     1490
     1491    if len(result_cache) == 0:
     1492        return # nothing to do
     1493
     1494    model = result_cache[0].__class__
     1495
     1496    # We need to be able to dynamically add to the list of prefetch_related
     1497    # fields that we look up (see below).  So we need some book keeping to
     1498    # ensure we don't do duplicate work.
     1499    done_fields = set() # list of fields like foo__bar__baz
     1500    done_lookups = {}   # dictionary of things like 'foo__bar': [results]
     1501    fields = list(fields)
     1502
     1503    # We may expand fields, so need a loop that allows for that
     1504    i = 0
     1505    while i < len(fields):
     1506        # 'field' can span several relationships, and so represent multiple
     1507        # lookups.
     1508        field = fields[i]
     1509
     1510        if field in done_fields:
     1511            # We've done exactly this already, skip the whole thing
     1512            i += 1
     1513            continue
     1514        done_fields.add(field)
     1515
     1516        # Top level, the list of objects to decorate is the the result cache
     1517        # from the primary QuerySet. It won't be for deeper levels.
     1518        obj_list = result_cache
     1519
     1520        attrs = field.split(LOOKUP_SEP)
     1521        for level, attr in enumerate(attrs):
     1522            # Prepare main instances
     1523            if len(obj_list) == 0:
     1524                break
     1525
     1526            good_objects = True
     1527            for obj in obj_list:
     1528                if not hasattr(obj, '_prefetched_objects_cache'):
     1529                    try:
     1530                        obj._prefetched_objects_cache = {}
     1531                    except AttributeError:
     1532                        # Must be in a QuerySet subclass that is not returning
     1533                        # Model instances, either in Django or 3rd
     1534                        # party. prefetch_related() doesn't make sense, so quit
     1535                        # now.
     1536                        good_objects = False
     1537                        break
     1538            if not good_objects:
     1539                break
     1540
     1541            # Descend down tree
     1542            try:
     1543                rel_obj = getattr(obj_list[0], attr)
     1544            except AttributeError:
     1545                raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid "
     1546                                     "parameter to prefetch_related()" %
     1547                                     (attr, obj_list[0].__class__.__name__, field))
     1548
     1549            can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set')
     1550            if level == len(attrs) - 1 and not can_prefetch:
     1551                # Last one, this *must* resolve to a related manager.
     1552                raise ValueError("'%s' does not resolve to a supported 'many related"
     1553                                 " manager' for model %s - this is an invalid"
     1554                                 " parameter to prefetch_related()."
     1555                                 % (field, model.__name__))
     1556
     1557            if can_prefetch:
     1558                # Check we didn't do this already
     1559                lookup = LOOKUP_SEP.join(attrs[0:level+1])
     1560                if lookup in done_lookups:
     1561                    obj_list = done_lookups[lookup]
     1562                else:
     1563                    relmanager = rel_obj
     1564                    obj_list, additional_prf = _prefetch_one_level(obj_list, relmanager, attr)
     1565                    for f in additional_prf:
     1566                        new_prf = LOOKUP_SEP.join([lookup, f])
     1567                        fields.append(new_prf)
     1568                    done_lookups[lookup] = obj_list
     1569            else:
     1570                # Assume we've got some singly related object. We replace
     1571                # the current list of parent objects with that list.
     1572                obj_list = [getattr(obj, attr) for obj in obj_list]
     1573
     1574        i += 1
     1575
     1576
     1577def _prefetch_one_level(instances, relmanager, attname):
     1578    """
     1579    Runs prefetches on all instances using the manager relmanager,
     1580    assigning results to queryset against instance.attname.
     1581
     1582    The prefetched objects are returned, along with any additional
     1583    prefetches that must be done due to prefetch_related fields
     1584    found from default managers.
     1585    """
     1586    mainqs = relmanager.get_prefetch_query_set(instances)
     1587    # We have to handle the possibility that the default manager itself added
     1588    # prefetch_related fields to the QuerySet we just got back. We don't want to
     1589    # trigger the prefetch_related functionality by evaluating the query.
     1590    # Rather, we need to merge in the prefetch_related fields.
     1591    additional_prf = list(getattr(mainqs, '_prefetch_related', []))
     1592    if additional_prf:
     1593        mainqs = mainqs.prefetch_related()
     1594    all_related_objects = list(mainqs)
     1595    for obj in instances:
     1596        qs = getattr(obj, attname).all()
     1597        qs._result_cache = relmanager.select_matching_instances(obj, all_related_objects)
     1598        # We don't want the individual qs doing prefetch_related now, since we
     1599        # have merged this into the current work.
     1600        qs._prefetch_done = True
     1601        obj._prefetched_objects_cache[attname] = qs
     1602    return all_related_objects, additional_prf
  • docs/ref/models/querysets.txt

    diff -rupN Django-1.3.1/docs/ref/models/querysets.txt Django-1.3.1_prefetch/docs/ref/models/querysets.txt
    old new related object.  
    689689``OneToOneFields`` will not be traversed in the reverse direction if you
    690690are performing a depth-based ``select_related``.
    691691
     692prefetch_related
     693~~~~~~~~~~~~~~~~
     694
     695.. method:: prefetch_related(*fields)
     696
     697.. versionadded:: 1.4
     698
     699Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
     700related many-to-many and many-to-one objects for the specified fields.
     701
     702This is similar to ``select_related`` for the 'many related objects' case, with
     703the following important differences:
     704
     705* ``prefetch_related`` causes a separate query to be issued for each set of
     706  related objects that you request. This query is done as soon as the QuerySet
     707  begins to be evaluated.
     708
     709  This is in contrast to ``select_related``, which modifies the original query
     710  with joins in order to get the related objects in the same query as the main
     711  objects.
     712
     713* It is 'single depth' only, and doesn't support join syntax.
     714
     715The fields that must be supplied to this method can be any attributes on the
     716model instances which represent related queries that return multiple
     717objects. This includes attributes representing the 'many' side of ``ForeignKey``
     718relationships and ``ManyToManyField`` attributes.
     719
     720For example, suppose you have these models::
     721
     722    class Topping(models.Model):
     723        name = models.CharField(max_length=30)
     724
     725    class Pizza(models.Model):
     726        name = models.CharField(max_length=50)
     727        toppings = models.ManyToManyField(Topping)
     728
     729        def __unicode__(self):
     730            return u"%s (%s)" % (self.name, u", ".join([topping.name
     731                                                        for topping in self.toppings.all()]))
     732
     733and run this code::
     734
     735    >>> Pizza.objects.all()
     736    [u"Hawaiian (ham, pineaapple)", u"Seafood (prawns, smoked salmon)"...
     737
     738The problem with this code is that it will run a query on the Toppings table for
     739**every** item in the Pizza ``QuerySet``.  Using ``prefetch_related``, this can
     740be reduced to two:
     741
     742    >>> pizzas = Pizza.objects.all().prefetch_related('toppings')
     743
     744All the relevant toppings will be fetched in a single query, and used to make
     745``QuerySets`` that have a pre-filled cache of the relevant results. These
     746``QuerySets`` are then used in the ``self.toppings.all()`` calls.
     747
     748Please note that use of ``prefetch_related`` will mean that the additional
     749queries run will **always** be executed - even if you never use the related
     750objects - and it always fully populates the result cache on the primary
     751``QuerySet`` (which can sometimes be avoided in other cases).
     752
     753Remember that, as always with QuerySets, any subsequent chained methods will
     754ignore previously cached results, and retrieve data in a fresh database
     755query. So, if you write the following:
     756
     757    >>> pizzas = Pizza.objects.prefetch_related('toppings')
     758    >>> [list(pizza.topppings.filter(spicy=True) for pizza in pizzas]
     759
     760...then the fact that `pizza.toppings.all()` has been prefetched will not help
     761you - in fact it hurts preformance, since you have done a database query that
     762you haven't used. So use this feature with caution!
     763
     764Chaining ``prefetch_related`` calls will accumulate the fields that should have
     765this behaviour applied. To clear any ``prefetch_related`` behaviour, call the
     766method with no arguments.
     767
     768
    692769extra
    693770~~~~~
    694771
  • tests/modeltests/prefetch_related/models.py

    diff -rupN Django-1.3.1/tests/modeltests/prefetch_related/models.py Django-1.3.1_prefetch/tests/modeltests/prefetch_related/models.py
    old new  
     1from django.db import models
     2
     3
     4class Author(models.Model):
     5    name = models.CharField(max_length=50)
     6    first_book = models.ForeignKey('Book', related_name='first_time_authors')
     7
     8    def __unicode__(self):
     9        return self.name
     10
     11class Book(models.Model):
     12    title = models.CharField(max_length=255)
     13
     14    authors = models.ManyToManyField(Author, related_name='books')
     15
     16    def __unicode__(self):
     17        return self.title
  • tests/modeltests/prefetch_related/tests.py

    diff -rupN Django-1.3.1/tests/modeltests/prefetch_related/tests.py Django-1.3.1_prefetch/tests/modeltests/prefetch_related/tests.py
    old new  
     1from django.test import TestCase
     2
     3from models import Author, Book
     4
     5
     6class PrefetchRelatedTests(TestCase):
     7
     8    def setUp(self):
     9
     10        self.book1 = Book.objects.create(title="Poems")
     11        self.book2 = Book.objects.create(title="Jane Eyre")
     12        self.book3 = Book.objects.create(title="Wuthering Heights")
     13
     14        self.author1 = Author.objects.create(name="Charlotte",
     15                                             first_book=self.book1)
     16        self.author2 = Author.objects.create(name="Anne",
     17                                             first_book=self.book1)
     18        self.author3 = Author.objects.create(name="Emily",
     19                                             first_book=self.book1)
     20
     21        self.book1.authors.add(self.author1)
     22        self.book1.authors.add(self.author2)
     23        self.book1.authors.add(self.author3)
     24        self.book2.authors.add(self.author1)
     25        self.book3.authors.add(self.author3)
     26
     27    def test_m2m_forward(self):
     28        with self.assertNumQueries(2):
     29            lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')]
     30
     31        normal_lists = [list(b.authors.all()) for b in Book.objects.all()]
     32        self.assertEqual(lists, normal_lists)
     33
     34
     35    def test_m2m_reverse(self):
     36        with self.assertNumQueries(2):
     37            lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')]
     38
     39        normal_lists = [list(a.books.all()) for a in Author.objects.all()]
     40        self.assertEqual(lists, normal_lists)
     41
     42    def test_foreignkey_reverse(self):
     43        with self.assertNumQueries(2):
     44            lists = [list(b.first_time_authors.all())
     45                     for b in Book.objects.prefetch_related('first_time_authors')]
     46
     47        self.assertQuerysetEqual(self.book2.authors.all(), [u"<Author: Charlotte>"])
     48
     49    def test_survives_clone(self):
     50        with self.assertNumQueries(2):
     51            lists = [list(b.first_time_authors.all())
     52                     for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)]
     53
     54    def test_len(self):
     55        with self.assertNumQueries(2):
     56            qs = Book.objects.prefetch_related('first_time_authors')
     57            length = len(qs)
     58            lists = [list(b.first_time_authors.all())
     59                     for b in qs]
     60
     61    def test_bool(self):
     62        with self.assertNumQueries(2):
     63            qs = Book.objects.prefetch_related('first_time_authors')
     64            x = bool(qs)
     65            lists = [list(b.first_time_authors.all())
     66                     for b in qs]
     67
     68    def test_clear(self):
     69        with self.assertNumQueries(4):
     70            with_prefetch = Author.objects.prefetch_related('books')
     71            without_prefetch = with_prefetch.prefetch_related()
     72            lists = [list(a.books.all()) for a in without_prefetch]
Back to Top