Ticket #17003: prefetch_singly_related_objs.4.diff

File prefetch_singly_related_objs.4.diff, 35.3 KB (added by Luke Plant, 13 years ago)

cleaned up, fixed some docs

  • django/contrib/contenttypes/generic.py

    diff -r 554e071b5c4a django/contrib/contenttypes/generic.py
    a b  
    22Classes allowing "generic" relations through ContentType and object-id fields.
    33"""
    44
     5from collections import defaultdict
    56from functools import partial
     7from operator import attrgetter
     8
    69from django.core.exceptions import ObjectDoesNotExist
    710from django.db import connection
    811from django.db.models import signals
     
    5962            # This should never happen. I love comments like this, don't you?
    6063            raise Exception("Impossible arguments to GFK.get_content_type!")
    6164
     65    def get_prefetch_query_set(self, instances):
     66        # For efficiency, group the instances by content type and then do one
     67        # query per model
     68        fk_dict = defaultdict(list)
     69        # We need one instance for each group in order to get the right db:
     70        instance_dict = {}
     71        ct_attname = self.model._meta.get_field(self.ct_field).get_attname()
     72        for instance in instances:
     73            # We avoid looking for values if either ct_id or fkey value is None
     74            ct_id = getattr(instance, ct_attname)
     75            if ct_id is not None:
     76                fk_val = getattr(instance, self.fk_field)
     77                if fk_val is not None:
     78                    fk_dict[ct_id].append(fk_val)
     79                    instance_dict[ct_id] = instance
     80
     81        ret_val = []
     82        for ct_id, fkeys in fk_dict.items():
     83            instance = instance_dict[ct_id]
     84            ct = self.get_content_type(id=ct_id, using=instance._state.db)
     85            ret_val.extend(ct.get_all_objects_for_this_type(pk__in=fkeys))
     86
     87        # For doing the join in Python, we have to match both the FK val and the
     88        # content type, so the 'attr' vals we return need to be callables that
     89        # will return a (fk, class) pair.
     90        def gfk_key(obj):
     91            ct_id = getattr(obj, ct_attname)
     92            if ct_id is None:
     93                return None
     94            else:
     95                return (getattr(obj, self.fk_field),
     96                        self.get_content_type(id=ct_id,
     97                                              using=obj._state.db).model_class())
     98
     99        return (ret_val,
     100                lambda obj: (obj._get_pk_val(), obj.__class__),
     101                gfk_key,
     102                True,
     103                self.cache_attr)
     104
     105    def is_cached(self, instance):
     106        return hasattr(instance, self.cache_attr)
     107
    62108    def __get__(self, instance, instance_type=None):
    63109        if instance is None:
    64110            return self
     
    282328                    [obj._get_pk_val() for obj in instances]
    283329                }
    284330            qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query)
    285             return (qs, self.object_id_field_name, 'pk')
     331            return (qs,
     332                    attrgetter(self.object_id_field_name),
     333                    lambda obj: obj._get_pk_val(),
     334                    False,
     335                    self.prefetch_cache_name)
    286336
    287337        def add(self, *objs):
    288338            for obj in objs:
  • django/contrib/contenttypes/models.py

    diff -r 554e071b5c4a django/contrib/contenttypes/models.py
    a b  
    113113        """
    114114        return self.model_class()._base_manager.using(self._state.db).get(**kwargs)
    115115
     116    def get_all_objects_for_this_type(self, **kwargs):
     117        """
     118        Returns all objects of this type for the keyword arguments given.
     119        """
     120        return self.model_class()._base_manager.using(self._state.db).filter(**kwargs)
     121
    116122    def natural_key(self):
    117123        return (self.app_label, self.model)
  • django/db/models/fields/related.py

    diff -r 554e071b5c4a django/db/models/fields/related.py
    a b  
     1from operator import attrgetter
     2
    13from django.db import connection, router
    24from django.db.backends import util
    35from django.db.models import signals, get_model
     
    227229        self.related = related
    228230        self.cache_name = related.get_cache_name()
    229231
     232    def is_cached(self, instance):
     233        return hasattr(instance, self.cache_name)
     234
     235    def get_query_set(self, **db_hints):
     236        db = router.db_for_read(self.related.model, **db_hints)
     237        return self.related.model._base_manager.using(db)
     238
     239    def get_prefetch_query_set(self, instances):
     240        vals = [instance._get_pk_val() for instance in instances]
     241        params = {'%s__pk__in' % self.related.field.name: vals}
     242        return (self.get_query_set(),
     243                attrgetter(self.related.field.attname),
     244                lambda obj: obj._get_pk_val(),
     245                True,
     246                self.cache_name)
     247
    230248    def __get__(self, instance, instance_type=None):
    231249        if instance is None:
    232250            return self
     
    234252            return getattr(instance, self.cache_name)
    235253        except AttributeError:
    236254            params = {'%s__pk' % self.related.field.name: instance._get_pk_val()}
    237             db = router.db_for_read(self.related.model, instance=instance)
    238             rel_obj = self.related.model._base_manager.using(db).get(**params)
     255            rel_obj = self.get_query_set(instance=instance).get(**params)
    239256            setattr(instance, self.cache_name, rel_obj)
    240257            return rel_obj
    241258
     
    283300    # ReverseSingleRelatedObjectDescriptor instance.
    284301    def __init__(self, field_with_rel):
    285302        self.field = field_with_rel
     303        self.cache_name = self.field.get_cache_name()
     304
     305    def is_cached(self, instance):
     306        return hasattr(instance, self.cache_name)
     307
     308    def get_query_set(self, **db_hints):
     309        db = router.db_for_read(self.field.rel.to, **db_hints)
     310        rel_mgr = self.field.rel.to._default_manager
     311        # If the related manager indicates that it should be used for
     312        # related fields, respect that.
     313        if getattr(rel_mgr, 'use_for_related_fields', False):
     314            return rel_mgr.using(db)
     315        else:
     316            return QuerySet(self.field.rel.to).using(db)
     317
     318    def get_prefetch_query_set(self, instances):
     319        vals = [getattr(instance, self.field.attname) for instance in instances]
     320        other_field = self.field.rel.get_related_field()
     321        if other_field.rel:
     322            params = {'%s__pk__in' % self.field.rel.field_name: vals}
     323        else:
     324            params = {'%s__in' % self.field.rel.field_name: vals}
     325        return (self.get_query_set().filter(**params),
     326                attrgetter(self.field.rel.field_name),
     327                attrgetter(self.field.attname),
     328                True,
     329                self.cache_name)
    286330
    287331    def __get__(self, instance, instance_type=None):
    288332        if instance is None:
    289333            return self
    290334
    291         cache_name = self.field.get_cache_name()
    292335        try:
    293             return getattr(instance, cache_name)
     336            return getattr(instance, self.cache_name)
    294337        except AttributeError:
    295338            val = getattr(instance, self.field.attname)
    296339            if val is None:
     
    303346                params = {'%s__pk' % self.field.rel.field_name: val}
    304347            else:
    305348                params = {'%s__exact' % self.field.rel.field_name: val}
    306 
    307             # If the related manager indicates that it should be used for
    308             # related fields, respect that.
    309             rel_mgr = self.field.rel.to._default_manager
    310             db = router.db_for_read(self.field.rel.to, instance=instance)
    311             if getattr(rel_mgr, 'use_for_related_fields', False):
    312                 rel_obj = rel_mgr.using(db).get(**params)
    313             else:
    314                 rel_obj = QuerySet(self.field.rel.to).using(db).get(**params)
    315             setattr(instance, cache_name, rel_obj)
     349            qs = self.get_query_set(instance=instance)
     350            rel_obj = qs.get(**params)
     351            setattr(instance, self.cache_name, rel_obj)
    316352            return rel_obj
    317353
    318354    def __set__(self, instance, value):
     
    425461                    return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters)
    426462
    427463            def get_prefetch_query_set(self, instances):
    428                 """
    429                 Return a queryset that does the bulk lookup needed
    430                 by prefetch_related functionality.
    431                 """
    432464                db = self._db or router.db_for_read(self.model)
    433465                query = {'%s__%s__in' % (rel_field.name, attname):
    434466                             [getattr(obj, attname) for obj in instances]}
    435467                qs = super(RelatedManager, self).get_query_set().using(db).filter(**query)
    436                 return (qs, rel_field.get_attname(), attname)
     468                return (qs,
     469                        attrgetter(rel_field.get_attname()),
     470                        attrgetter(attname),
     471                        False,
     472                        rel_field.related_query_name())
    437473
    438474            def add(self, *objs):
    439475                for obj in objs:
     
    507543                return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters)
    508544
    509545        def get_prefetch_query_set(self, instances):
    510             """
    511             Returns a tuple:
    512             (queryset of instances of self.model that are related to passed in instances
    513              attr of returned instances needed for matching
    514              attr of passed in instances needed for matching)
    515             """
    516546            from django.db import connections
    517547            db = self._db or router.db_for_read(self.model)
    518548            query = {'%s__pk__in' % self.query_field_name:
     
    534564            qs = qs.extra(select={'_prefetch_related_val':
    535565                                      '%s.%s' % (qn(join_table), qn(source_col))})
    536566            select_attname = fk.rel.get_related_field().get_attname()
    537             return (qs, '_prefetch_related_val', select_attname)
     567            return (qs,
     568                    attrgetter('_prefetch_related_val'),
     569                    attrgetter(select_attname),
     570                    False,
     571                    self.prefetch_cache_name)
    538572
    539573        # If the ManyToMany relation has an intermediary model,
    540574        # the add and remove methods do not exist.
  • django/db/models/query.py

    diff -r 554e071b5c4a django/db/models/query.py
    a b  
    16121612                break
    16131613
    16141614            # Descend down tree
    1615             try:
    1616                 rel_obj = getattr(obj_list[0], attr)
    1617             except AttributeError:
     1615
     1616            # We assume that objects retrieved are homogenous (which is the premise
     1617            # of prefetch_related), so what applies to first object applies to all.
     1618            first_obj = obj_list[0]
     1619            prefetcher, attr_found, is_fetched = get_prefetcher(first_obj, attr)
     1620
     1621            if not attr_found:
    16181622                raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid "
    16191623                                     "parameter to prefetch_related()" %
    1620                                      (attr, obj_list[0].__class__.__name__, lookup))
     1624                                     (attr, first_obj.__class__.__name__, lookup))
    16211625
    1622             can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set')
    1623             if level == len(attrs) - 1 and not can_prefetch:
    1624                 # Last one, this *must* resolve to a related manager.
    1625                 raise ValueError("'%s' does not resolve to a supported 'many related"
    1626                                  " manager' for model %s - this is an invalid"
    1627                                  " parameter to prefetch_related()."
    1628                                  % (lookup, model.__name__))
     1626            if level == len(attrs) - 1 and prefetcher is None:
     1627                # Last one, this *must* resolve to something that supports
     1628                # prefetching, otherwise there is no point adding it and the
     1629                # developer asking for it has made a mistake.
     1630                raise ValueError("'%s' does not resolve to a item that supports "
     1631                                 "prefetching - this is an invalid parameter to "
     1632                                 "prefetch_related()." % lookup)
    16291633
    1630             if can_prefetch:
     1634            if prefetcher is not None and not is_fetched:
    16311635                # Check we didn't do this already
    16321636                current_lookup = LOOKUP_SEP.join(attrs[0:level+1])
    16331637                if current_lookup in done_queries:
    16341638                    obj_list = done_queries[current_lookup]
    16351639                else:
    1636                     relmanager = rel_obj
    1637                     obj_list, additional_prl = prefetch_one_level(obj_list, relmanager, attr)
     1640                    obj_list, additional_prl = prefetch_one_level(obj_list, prefetcher, attr)
    16381641                    for f in additional_prl:
    16391642                        new_prl = LOOKUP_SEP.join([current_lookup, f])
    16401643                        related_lookups.append(new_prl)
    16411644                    done_queries[current_lookup] = obj_list
    16421645            else:
    1643                 # Assume we've got some singly related object. We replace
    1644                 # the current list of parent objects with that list.
     1646                # Either a singly related object that has already been fetched
     1647                # (e.g. via select_related), or hopefully some other property
     1648                # that doesn't support prefetching but needs to be traversed.
     1649
     1650                # We replace the current list of parent objects with that list.
    16451651                obj_list = [getattr(obj, attr) for obj in obj_list]
    16461652
    16471653                # Filter out 'None' so that we can continue with nullable
     
    16491655                obj_list = [obj for obj in obj_list if obj is not None]
    16501656
    16511657
    1652 def prefetch_one_level(instances, relmanager, attname):
     1658def get_prefetcher(instance, attr):
     1659    """
     1660    For the attribute 'attr' on the given instance, finds
     1661    an object that has a get_prefetch_query_set().
     1662    Return a 3 tuple containing:
     1663    (the object with get_prefetch_query_set (or None),
     1664     a boolean that is False if the attribute was not found at all,
     1665     a boolean that is True if the attribute has already been fetched)
     1666    """
     1667    prefetcher = None
     1668    attr_found = False
     1669    is_fetched = False
     1670
     1671    # For singly related objects, we have to avoid getting the attribute
     1672    # from the object, as this will trigger the query. So we first try
     1673    # on the class, in order to get the descriptor object.
     1674    rel_obj_descriptor = getattr(instance.__class__, attr, None)
     1675    if rel_obj_descriptor is None:
     1676        try:
     1677            rel_obj = getattr(instance, attr)
     1678            attr_found = True
     1679        except AttributeError:
     1680            pass
     1681    else:
     1682        attr_found = True
     1683        if rel_obj_descriptor:
     1684            # singly related object, descriptor object has the
     1685            # get_prefetch_query_set() method.
     1686            if hasattr(rel_obj_descriptor, 'get_prefetch_query_set'):
     1687                prefetcher = rel_obj_descriptor
     1688                if rel_obj_descriptor.is_cached(instance):
     1689                    is_fetched = True
     1690            else:
     1691                # descriptor doesn't support prefetching, so we go ahead and get
     1692                # the attribute on the instance rather than the class to
     1693                # support many related managers
     1694                rel_obj = getattr(instance, attr)
     1695                if hasattr(rel_obj, 'get_prefetch_query_set'):
     1696                    prefetcher = rel_obj
     1697    return prefetcher, attr_found, is_fetched
     1698
     1699
     1700def prefetch_one_level(instances, prefetcher, attname):
    16531701    """
    16541702    Helper function for prefetch_related_objects
    16551703
    1656     Runs prefetches on all instances using the manager relmanager,
    1657     assigning results to queryset against instance.attname.
     1704    Runs prefetches on all instances using the prefetcher object,
     1705    assigning results to relevant caches in instance.
    16581706
    16591707    The prefetched objects are returned, along with any additional
    16601708    prefetches that must be done due to prefetch_related lookups
    16611709    found from default managers.
    16621710    """
    1663     rel_qs, rel_obj_attr, instance_attr = relmanager.get_prefetch_query_set(instances)
     1711    # prefetcher must have a method get_prefetch_query_set() which takes a list
     1712    # of instances, and returns a tuple:
     1713
     1714    # (queryset of instances of self.model that are related to passed in instances,
     1715    #  callable that gets value to be matched for returned instances,
     1716    #  callable that gets value to be matched for passed in instances,
     1717    #  boolean that is True for singly related objects,
     1718    #  cache name to assign to).
     1719
     1720    # The 'values to be matched' must be hashable as they will be used
     1721    # in a dictionary.
     1722
     1723    rel_qs, rel_obj_attr, instance_attr, single, cache_name =\
     1724        prefetcher.get_prefetch_query_set(instances)
    16641725    # We have to handle the possibility that the default manager itself added
    16651726    # prefetch_related lookups to the QuerySet we just got back. We don't want to
    16661727    # trigger the prefetch_related functionality by evaluating the query.
     
    16761737
    16771738    rel_obj_cache = {}
    16781739    for rel_obj in all_related_objects:
    1679         rel_attr_val = getattr(rel_obj, rel_obj_attr)
     1740        rel_attr_val = rel_obj_attr(rel_obj)
    16801741        if rel_attr_val not in rel_obj_cache:
    16811742            rel_obj_cache[rel_attr_val] = []
    16821743        rel_obj_cache[rel_attr_val].append(rel_obj)
    16831744
    16841745    for obj in instances:
    1685         qs = getattr(obj, attname).all()
    1686         instance_attr_val = getattr(obj, instance_attr)
    1687         qs._result_cache = rel_obj_cache.get(instance_attr_val, [])
    1688         # We don't want the individual qs doing prefetch_related now, since we
    1689         # have merged this into the current work.
    1690         qs._prefetch_done = True
    1691         obj._prefetched_objects_cache[attname] = qs
     1746        instance_attr_val = instance_attr(obj)
     1747        vals = rel_obj_cache.get(instance_attr_val, [])
     1748        if single:
     1749            # Need to assign to single cache on instance
     1750            if vals:
     1751                setattr(obj, cache_name, vals[0])
     1752        else:
     1753            # Multi, attribute represents a manager with an .all() method that
     1754            # returns a QuerySet
     1755            qs = getattr(obj, attname).all()
     1756            qs._result_cache = vals
     1757            # We don't want the individual qs doing prefetch_related now, since we
     1758            # have merged this into the current work.
     1759            qs._prefetch_done = True
     1760            obj._prefetched_objects_cache[cache_name] = qs
    16921761    return all_related_objects, additional_prl
  • docs/ref/models/querysets.txt

    diff -r 554e071b5c4a docs/ref/models/querysets.txt
    a b  
    696696.. versionadded:: 1.4
    697697
    698698Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
    699 related many-to-many and many-to-one objects for each of the specified lookups.
    700 
    701 This is similar to ``select_related`` for the 'many related objects' case, but
    702 note that ``prefetch_related`` causes a separate query to be issued for each set
    703 of related objects that you request, unlike ``select_related`` which modifies
    704 the original query with joins in order to get the related objects. With
    705 ``prefetch_related``, the additional queries are done as soon as the QuerySet
    706 begins to be evaluated.
     699related objects for each of the specified lookups.
     700
     701This has a similar purpose to ``select_related``, in that both are designed to
     702stop the deluge of database queries that is caused by accessing related objects,
     703but the strategy is quite different.
     704
     705``select_related`` works by creating a SQL join and including the fields of the
     706related object in the SELECT statement. For this reason, ``select_related`` gets
     707the related objects in the same database query. However, to avoid the much
     708larger result set that would result from joining across a 'many' relationship,
     709``select_related`` is limited to single-valued relationships - foreign key and
     710one-to-one.
     711
     712``prefetch_related``, on the other hand, does a separate lookup for each
     713relationship, and does the 'joining' in Python. This allows it to prefetch
     714many-to-many and many-to-one objects, which cannot be done using
     715``select_related``, in addition to the foreign key and one-to-one relationships
     716that are supported by ``select_related``. It also supports prefetching of
     717:class:`~django.contrib.contenttypes.generic.GenericRelation` and
     718:class:`~django.contrib.contenttypes.generic.GenericForeignKey`.
    707719
    708720For example, suppose you have these models::
    709721
     
    733745``QuerySets`` that have a pre-filled cache of the relevant results. These
    734746``QuerySets`` are then used in the ``self.toppings.all()`` calls.
    735747
    736 Please note that use of ``prefetch_related`` will mean that the additional
    737 queries run will **always** be executed - even if you never use the related
    738 objects - and it always fully populates the result cache on the primary
    739 ``QuerySet`` (which can sometimes be avoided in other cases).
     748The additional queries are executed after the QuerySet has begun to be evaluated
     749and the primary query has been executed. Note that the result cache of the
     750primary QuerySet and all specified related objects will then be fully loaded
     751into memory, which is often avoided in other cases - even after a query has been
     752executed in the database, QuerySet normally tries to make uses of chunking
     753between the database to avoid loading all objects into memory before you need
     754them.
    740755
    741756Also remember that, as always with QuerySets, any subsequent chained methods
    742 will ignore previously cached results, and retrieve data using a fresh database
    743 query. So, if you write the following:
     757which imply a different database query will ignore previously cached results,
     758and retrieve data using a fresh database query. So, if you write the following:
    744759
    745760    >>> pizzas = Pizza.objects.prefetch_related('toppings')
    746761    >>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas]
     
    749764you - in fact it hurts performance, since you have done a database query that
    750765you haven't used. So use this feature with caution!
    751766
    752 The lookups that must be supplied to this method can be any attributes on the
    753 model instances which represent related queries that return multiple
    754 objects. This includes attributes representing the 'many' side of ``ForeignKey``
    755 relationships, forward and reverse ``ManyToManyField`` attributes, and also any
    756 ``GenericRelations``.
    757 
    758767You can also use the normal join syntax to do related fields of related
    759768fields. Suppose we have an additional model to the example above::
    760769
     
    770779belonging to those pizzas. This will result in a total of 3 database queries -
    771780one for the restaurants, one for the pizzas, and one for the toppings.
    772781
     782    >>> Restaurant.objects.prefetch_related('best_pizza__toppings')
     783
     784This will fetch the best pizza and all the toppings for the best pizza for each
     785restaurant. This will be done in 3 database queries - one for the restaurants,
     786one for the 'best pizzas', and one for one for the toppings.
     787
     788Of course, the ``best_pizza`` relationship could also be fetched using
     789``select_related`` to reduce the query count to 2:
     790
    773791    >>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')
    774792
    775 This will fetch the best pizza and all the toppings for the best pizza for each
    776 restaurant. This will be done in 2 database queries - one for the restaurants
    777 and 'best pizzas' combined (achieved through use of ``select_related``), and one
    778 for the toppings.
    779 
    780 Chaining ``prefetch_related`` calls will accumulate the fields that should have
    781 this behavior applied. To clear any ``prefetch_related`` behavior, pass `None`
    782 as a parameter::
     793Since the prefetch is executed after the main query (which includes the joins
     794needed by ``select_related``), it is able to detect that the ``best_pizza``
     795objects have already been fetched, and it will skip fetching them again.
     796
     797Chaining ``prefetch_related`` calls will accumulate the lookups that are
     798prefetched. To clear any ``prefetch_related`` behavior, pass `None` as a
     799parameter::
    783800
    784801   >>> non_prefetched = qs.prefetch_related(None)
    785802
    786 One difference when using ``prefetch_related`` is that, in some circumstances,
    787 objects created by a query can be shared between the different objects that they
    788 are related to i.e. a single Python model instance can appear at more than one
    789 point in the tree of objects that are returned. Normally this behavior will not
    790 be a problem, and will in fact save both memory and CPU time.
     803One difference to note when using ``prefetch_related`` is that objects created
     804by a query can be shared between the different objects that they are related to
     805i.e. a single Python model instance can appear at more than one point in the
     806tree of objects that are returned. This will normally happen with foreign key
     807relationships. Typically this behavior will not be a problem, and will in fact
     808save both memory and CPU time.
     809
     810While ``prefetch_related`` supports prefetching ``GenericForeignKey``
     811relationships, the number of queries will depend on the data. Since a
     812``GenericForeignKey`` can reference data in multiple tables, one query per table
     813referenced is needed, rather than one query for all the items. There could be
     814additional queries on the ``ContentType`` table if the relevant rows have not
     815already been fetched.
    791816
    792817extra
    793818~~~~~
  • docs/releases/1.4.txt

    diff -r 554e071b5c4a docs/releases/1.4.txt
    a b  
    6666``QuerySet.prefetch_related``
    6767~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    6868
    69 Analagous to :meth:`~django.db.models.query.QuerySet.select_related` but for
    70 many-to-many relationships,
     69Similar to :meth:`~django.db.models.query.QuerySet.select_related` but with a
     70different strategy and broader scope,
    7171:meth:`~django.db.models.query.QuerySet.prefetch_related` has been added to
    72 :class:`~django.db.models.query.QuerySet`. This method returns a new ``QuerySet``
    73 that will prefetch in a single batch each of the specified related lookups as
    74 soon as it begins to be evaluated (e.g. by iterating over it). This enables you
    75 to fix many instances of a very common performance problem, in which your code
    76 ends up doing O(n) database queries (or worse) if objects on your primary
    77 ``QuerySet`` each have many related objects that you also need.
     72:class:`~django.db.models.query.QuerySet`. This method returns a new
     73``QuerySet`` that will prefetch in a single batch each of the specified related
     74lookups as soon as it begins to be evaluated. Unlike ``select_related``, it does
     75the joins in Python, not in the database, and supports many-to-many
     76relationships, :class:`~django.contrib.contenttypes.generic.GenericForeignKey`
     77and more. This enables you to fix many instances of a very common performance
     78problem, in which your code ends up doing O(n) database queries (or worse) if
     79objects on your primary ``QuerySet`` each have many related objects that you
     80also need.
    7881
    7982HTML5
    8083~~~~~
  • tests/modeltests/prefetch_related/models.py

    diff -r 554e071b5c4a tests/modeltests/prefetch_related/models.py
    a b  
    104104        ordering = ['id']
    105105
    106106
    107 ## Generic relation tests
     107## GenericRelation/GenericForeignKey tests
    108108
    109109class TaggedItem(models.Model):
    110110    tag = models.SlugField()
    111111    content_type = models.ForeignKey(ContentType, related_name="taggeditem_set2")
    112112    object_id = models.PositiveIntegerField()
    113113    content_object = generic.GenericForeignKey('content_type', 'object_id')
     114    created_by_ct = models.ForeignKey(ContentType, null=True)
     115    created_by_fkey = models.PositiveIntegerField(null=True)
     116    created_by = generic.GenericForeignKey('created_by_ct', 'created_by_fkey')
    114117
    115118    def __unicode__(self):
    116119        return self.tag
  • tests/modeltests/prefetch_related/tests.py

    diff -r 554e071b5c4a tests/modeltests/prefetch_related/tests.py
    a b  
    5454        normal_lists = [list(a.books.all()) for a in Author.objects.all()]
    5555        self.assertEqual(lists, normal_lists)
    5656
     57    def test_foreignkey_forward(self):
     58        with self.assertNumQueries(2):
     59            books = [a.first_book for a in Author.objects.prefetch_related('first_book')]
     60
     61        normal_books = [a.first_book for a in Author.objects.all()]
     62        self.assertEqual(books, normal_books)
     63
    5764    def test_foreignkey_reverse(self):
    5865        with self.assertNumQueries(2):
    5966            lists = [list(b.first_time_authors.all())
     
    175182        self.assertTrue('prefetch_related' in str(cm.exception))
    176183
    177184    def test_invalid_final_lookup(self):
    178         qs = Book.objects.prefetch_related('authors__first_book')
     185        qs = Book.objects.prefetch_related('authors__name')
    179186        with self.assertRaises(ValueError) as cm:
    180187            list(qs)
    181188
    182189        self.assertTrue('prefetch_related' in str(cm.exception))
    183         self.assertTrue("first_book" in str(cm.exception))
     190        self.assertTrue("name" in str(cm.exception))
    184191
    185192
    186193class DefaultManagerTests(TestCase):
     
    222229
    223230class GenericRelationTests(TestCase):
    224231
    225     def test_traverse_GFK(self):
    226         """
    227         Test that we can traverse a 'content_object' with prefetch_related()
    228         """
    229         # In fact, there is no special support for this in prefetch_related code
    230         # - we can traverse any object that will lead us to objects that have
    231         # related managers.
    232 
     232    def setUp(self):
    233233        book1 = Book.objects.create(title="Winnie the Pooh")
    234234        book2 = Book.objects.create(title="Do you like green eggs and spam?")
     235        book3 = Book.objects.create(title="Three Men In A Boat")
    235236
    236237        reader1 = Reader.objects.create(name="me")
    237238        reader2 = Reader.objects.create(name="you")
     239        reader3 = Reader.objects.create(name="someone")
    238240
    239         book1.read_by.add(reader1)
     241        book1.read_by.add(reader1, reader2)
    240242        book2.read_by.add(reader2)
     243        book3.read_by.add(reader3)
    241244
    242         TaggedItem.objects.create(tag="awesome", content_object=book1)
    243         TaggedItem.objects.create(tag="awesome", content_object=book2)
     245        self.book1, self.book2, self.book3 = book1, book2, book3
     246        self.reader1, self.reader2, self.reader3 = reader1, reader2, reader3
     247
     248    def test_prefetch_GFK(self):
     249        TaggedItem.objects.create(tag="awesome", content_object=self.book1)
     250        TaggedItem.objects.create(tag="great", content_object=self.reader1)
     251        TaggedItem.objects.create(tag="stupid", content_object=self.book2)
     252        TaggedItem.objects.create(tag="amazing", content_object=self.reader3)
     253
     254        # 1 for TaggedItem table, 1 for Book table, 1 for Reader table
     255        with self.assertNumQueries(3):
     256            qs = TaggedItem.objects.prefetch_related('content_object')
     257            list(qs)
     258
     259    def test_traverse_GFK(self):
     260        """
     261        Test that we can traverse a 'content_object' with prefetch_related() and
     262        get to related objects on the other side (assuming it is suitably
     263        filtered)
     264        """
     265        TaggedItem.objects.create(tag="awesome", content_object=self.book1)
     266        TaggedItem.objects.create(tag="awesome", content_object=self.book2)
     267        TaggedItem.objects.create(tag="awesome", content_object=self.book3)
     268        TaggedItem.objects.create(tag="awesome", content_object=self.reader1)
     269        TaggedItem.objects.create(tag="awesome", content_object=self.reader2)
    244270
    245271        ct = ContentType.objects.get_for_model(Book)
    246272
    247         # We get 4 queries - 1 for main query, 2 for each access to
    248         # 'content_object' because these can't be handled by select_related, and
    249         # 1 for the 'read_by' relation.
    250         with self.assertNumQueries(4):
     273        # We get 3 queries - 1 for main query, 1 for content_objects since they
     274        # all use the same table, and 1 for the 'read_by' relation.
     275        with self.assertNumQueries(3):
    251276            # If we limit to books, we know that they will have 'read_by'
    252277            # attributes, so the following makes sense:
    253             qs = TaggedItem.objects.select_related('content_type').prefetch_related('content_object__read_by').filter(tag='awesome').filter(content_type=ct, tag='awesome')
    254             readers_of_awesome_books = [r.name for tag in qs
    255                                         for r in tag.content_object.read_by.all()]
    256             self.assertEqual(readers_of_awesome_books, ["me", "you"])
     278            qs = TaggedItem.objects.filter(content_type=ct, tag='awesome').prefetch_related('content_object__read_by')
     279            readers_of_awesome_books = set([r.name for tag in qs
     280                                            for r in tag.content_object.read_by.all()])
     281            self.assertEqual(readers_of_awesome_books, set(["me", "you", "someone"]))
    257282
     283    def test_nullable_GFK(self):
     284        TaggedItem.objects.create(tag="awesome", content_object=self.book1,
     285                                  created_by=self.reader1)
     286        TaggedItem.objects.create(tag="great", content_object=self.book2)
     287        TaggedItem.objects.create(tag="rubbish", content_object=self.book3)
     288
     289        with self.assertNumQueries(2):
     290            result = [t.created_by for t in TaggedItem.objects.prefetch_related('created_by')]
     291
     292        self.assertEqual(result,
     293                         [t.created_by for t in TaggedItem.objects.all()])
    258294
    259295    def test_generic_relation(self):
    260296        b = Bookmark.objects.create(url='http://www.djangoproject.com/')
     
    311347        self.assertEquals(lst, lst2)
    312348
    313349    def test_parent_link_prefetch(self):
    314         with self.assertRaises(ValueError) as cm:
    315             qs = list(AuthorWithAge.objects.prefetch_related('author'))
    316         self.assertTrue('prefetch_related' in str(cm.exception))
     350        with self.assertNumQueries(2):
     351            [a.author for a in AuthorWithAge.objects.prefetch_related('author')]
     352
     353    def test_child_link_prefetch(self):
     354        with self.assertNumQueries(2):
     355            l = [a.authorwithage for a in Author.objects.prefetch_related('authorwithage')]
     356
     357        self.assertEqual(l, [a.authorwithage for a in Author.objects.all()])
    317358
    318359
    319360class ForeignKeyToFieldTest(TestCase):
     
    406447        worker2 = Employee.objects.create(name="Angela", boss=boss)
    407448
    408449    def test_traverse_nullable(self):
     450        # Because we use select_related() for 'boss', it doesn't need to be
     451        # prefetched, but we can still traverse it although it contains some nulls
    409452        with self.assertNumQueries(2):
    410453            qs = Employee.objects.select_related('boss').prefetch_related('boss__serfs')
    411454            co_serfs = [list(e.boss.serfs.all()) if e.boss is not None else []
     
    416459                        for e in qs2]
    417460
    418461        self.assertEqual(co_serfs, co_serfs2)
     462
     463    def test_prefetch_nullable(self):
     464        # One for main employee, one for boss, one for serfs
     465        with self.assertNumQueries(3):
     466            qs = Employee.objects.prefetch_related('boss__serfs')
     467            co_serfs = [list(e.boss.serfs.all()) if e.boss is not None else []
     468                        for e in qs]
     469
     470        qs2 =  Employee.objects.all()
     471        co_serfs2 =  [list(e.boss.serfs.all()) if e.boss is not None else []
     472                        for e in qs2]
     473
     474        self.assertEqual(co_serfs, co_serfs2)
Back to Top