Ticket #16937: prefetch_2.diff

File prefetch_2.diff, 18.6 KB (added by Luke Plant, 13 years ago)

Reworked patch

  • django/db/models/fields/related.py

    diff --git a/django/db/models/fields/related.py b/django/db/models/fields/related.py
    a b  
    435435                db = self._db or router.db_for_read(self.model, instance=self.instance)
    436436                return super(RelatedManager, self).get_query_set().using(db).filter(**(self.core_filters))
    437437
     438            def get_prefetch_query_set(self, instances):
     439                """
     440                Return a queryset that does the bulk lookup needed
     441                by prefetch_related functionality.
     442                """
     443                if not instances:
     444                    return self.model.objects.none()
     445
     446                db = self._db or router.db_for_read(self.model, instance=instances[0])
     447                query = {'%s__%s__in' % (rel_field.name, attname):
     448                             [getattr(obj, attname) for obj in instances]}
     449                return super(RelatedManager, self).get_query_set().using(db).filter(**query)
     450
     451            def select_matching_instances(self, obj, related_objects):
     452                field_val = getattr(obj, attname)
     453                other_attname = rel_field.get_attname()
     454                return [rel_obj for rel_obj in related_objects
     455                        if getattr(rel_obj, other_attname) == field_val]
     456
     457            def all(self):
     458                try:
     459                    return self.instance._prefetched_objects_cache[rel_field.related_query_name()]
     460                except (AttributeError, KeyError):
     461                    return super(RelatedManager, self).all()
     462
    438463            def add(self, *objs):
    439464                for obj in objs:
    440465                    if not isinstance(obj, self.model):
     
    482507    """Creates a manager that subclasses 'superclass' (which is a Manager)
    483508    and adds behavior for many-to-many related objects."""
    484509    class ManyRelatedManager(superclass):
    485         def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None,
     510        def __init__(self, model=None, query_field_name=None, instance=None, symmetrical=None,
    486511                     source_field_name=None, target_field_name=None, reverse=False,
    487                      through=None):
     512                     through=None, prefetch_cache_name=None):
    488513            super(ManyRelatedManager, self).__init__()
    489514            self.model = model
    490             self.core_filters = core_filters
     515            self.query_field_name = query_field_name
     516            self.core_filters = {'%s__pk' % query_field_name: instance._get_pk_val()}
    491517            self.instance = instance
    492518            self.symmetrical = symmetrical
    493519            self.source_field_name = source_field_name
    494520            self.target_field_name = target_field_name
    495521            self.reverse = reverse
    496522            self.through = through
     523            self.prefetch_cache_name = prefetch_cache_name
    497524            self._pk_val = self.instance.pk
    498525            if self._pk_val is None:
    499526                raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__)
     
    502529            db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance)
    503530            return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**(self.core_filters))
    504531
     532        def get_prefetch_query_set(self, instances):
     533            if not instances:
     534                return self.model.objects.none()
     535
     536            from django.db import connections
     537
     538            db = self._db or router.db_for_read(self.model, instance=instances[0])
     539            query = {'%s__pk__in' % self.query_field_name:
     540                         [obj._get_pk_val() for obj in instances]}
     541            qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query)
     542
     543            # M2M: need to annotate the query in order to get the PK of the
     544            # primary model that the secondary model was actually related to.
     545
     546            # We know that there will already be a join on the join table, so we
     547            # can just add the select.
     548            join_table = self.through._meta.db_table
     549            pk_col = "%s_id" % self.source_field_name
     550            connection = connections[db]
     551            qn = connection.ops.quote_name
     552            qs = qs.extra(select={'_prefetch_related_pk':
     553                                      '%s.%s' % (qn(join_table), qn(pk_col))})
     554            return qs
     555
     556        def select_matching_instances(self, obj, related_objects):
     557            pk_val = obj._get_pk_val()
     558            return [rel_obj for rel_obj in related_objects
     559                    if rel_obj._prefetch_related_pk == pk_val]
     560
     561        def all(self):
     562            try:
     563                return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
     564            except (AttributeError, KeyError):
     565                return super(ManyRelatedManager, self).all()
     566
    505567        # If the ManyToMany relation has an intermediary model,
    506568        # the add and remove methods do not exist.
    507569        if rel.through._meta.auto_created:
     
    683745
    684746        manager = self.related_manager_cls(
    685747            model=rel_model,
    686             core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()},
     748            query_field_name=self.related.field.name,
     749            prefetch_cache_name=self.related.field.related_query_name(),
    687750            instance=instance,
    688751            symmetrical=False,
    689752            source_field_name=self.related.field.m2m_reverse_field_name(),
     
    739802
    740803        manager = self.related_manager_cls(
    741804            model=self.field.rel.to,
    742             core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()},
     805            query_field_name=self.field.related_query_name(),
     806            prefetch_cache_name=self.field.name,
    743807            instance=instance,
    744808            symmetrical=self.field.rel.symmetrical,
    745809            source_field_name=self.field.m2m_field_name(),
  • django/db/models/manager.py

    diff --git a/django/db/models/manager.py b/django/db/models/manager.py
    a b  
    172172    def select_related(self, *args, **kwargs):
    173173        return self.get_query_set().select_related(*args, **kwargs)
    174174
     175    def prefetch_related(self, *args, **kwargs):
     176        return self.get_query_set().prefetch_related(*args, **kwargs)
     177
    175178    def values(self, *args, **kwargs):
    176179        return self.get_query_set().values(*args, **kwargs)
    177180
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    a b  
    3636        self._iter = None
    3737        self._sticky_filter = False
    3838        self._for_write = False
     39        self._prefetch_related = set()
     40        self._prefetch_done = False
    3941
    4042    ########################
    4143    # PYTHON MAGIC METHODS #
     
    8183                self._result_cache = list(self.iterator())
    8284        elif self._iter:
    8385            self._result_cache.extend(self._iter)
     86        if self._prefetch_related and not self._prefetch_done:
     87            self._prefetch_related_objects()
    8488        return len(self._result_cache)
    8589
    8690    def __iter__(self):
     91        if self._prefetch_related:
     92            # We need all the results in order to be able to do the prefetch
     93            # in one go. To minimize code duplication, we use the __len__
     94            # code path which also forces this, and also does the prefetch
     95            len(self)
     96
    8797        if self._result_cache is None:
    8898            self._iter = self.iterator()
    8999            self._result_cache = []
     
    106116                self._fill_cache()
    107117
    108118    def __nonzero__(self):
     119        if self._prefetch_related:
     120            # We need all the results in order to be able to do the prefetch
     121            # in one go. To minimize code duplication, we use the __len__
     122            # code path which also forces this, and also does the prefetch
     123            len(self)
     124
    109125        if self._result_cache is not None:
    110126            return bool(self._result_cache)
    111127        try:
     
    526542            return self.query.has_results(using=self.db)
    527543        return bool(self._result_cache)
    528544
     545    def _prefetch_related_objects(self):
     546        # This method can only be called once the result cache has been filled.
     547        prefetch_related_objects(self._result_cache, self._prefetch_related)
     548        self._prefetch_done = True
     549
    529550    ##################################################
    530551    # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
    531552    ##################################################
     
    649670            obj.query.max_depth = depth
    650671        return obj
    651672
     673    def prefetch_related(self, *fields):
     674        """
     675        Returns a new QuerySet instance that will prefetch Many-To-One
     676        and Many-To-Many related objects when the QuerySet is evaluated.
     677
     678        The fields specified must be attributes that return a RelatedManager of
     679        some kind when used on instances of the evaluated QuerySet.
     680
     681        These RelatedManagers will be modified so that their 'all()' method will
     682        return a QuerySet whose cache is already filled with objects that were
     683        looked up in a single batch, rather than one query per object in the
     684        current QuerySet.
     685
     686        When prefetch_related() is called more than once, the list of fields to
     687        prefetch is added to. Call prefetch_related() with no arguments to
     688        clears the list.
     689        """
     690        if fields:
     691            new_fields = self._prefetch_related.union(set(fields))
     692        else:
     693            new_fields = set()
     694        return self._clone(_prefetch_related=new_fields)
     695
    652696    def dup_select_related(self, other):
    653697        """
    654698        Copies the related selection status from the QuerySet 'other' to the
     
    798842            query.filter_is_sticky = True
    799843        c = klass(model=self.model, query=query, using=self._db)
    800844        c._for_write = self._for_write
     845        c._prefetch_related = self._prefetch_related
    801846        c.__dict__.update(kwargs)
    802847        if setup and hasattr(c, '_setup_query'):
    803848            c._setup_query()
     
    14841529    query = sql.InsertQuery(model)
    14851530    query.insert_values(fields, objs, raw=raw)
    14861531    return query.get_compiler(using=using).execute_sql(return_id)
     1532
     1533
     1534def prefetch_related_objects(result_cache, fields):
     1535    """
     1536    Populates prefetched objects caches for a list of results
     1537    from a QuerySet
     1538    """
     1539    obj = None
     1540    for obj in result_cache:
     1541        if not hasattr(obj, '_prefetched_objects_cache'):
     1542            try:
     1543                obj._prefetched_objects_cache = {}
     1544            except AttributeError:
     1545                # Must be in a QuerySet subclass that is not getting Model
     1546                # instances, either in Django or 3rd party. prefetch_related
     1547                # doesn't make sense, so quit now.
     1548                return
     1549
     1550    if obj is None:
     1551        return # nothing to do
     1552
     1553    for attname in fields:
     1554        # We get one related manager, from the first object, and re-use this for
     1555        # all operations. We use only APIs that ignore the relmanager.instance
     1556        # attribute.
     1557        relmanager = getattr(obj, attname)
     1558
     1559        all_related_objects = list(relmanager.get_prefetch_query_set(result_cache))
     1560        for obj in result_cache:
     1561            qs = getattr(obj, attname).all()
     1562            qs._result_cache = relmanager.select_matching_instances(obj, all_related_objects)
     1563            obj._prefetched_objects_cache[attname] = qs
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    a b  
    690690A :class:`~django.db.models.OneToOneField` is not traversed in the reverse
    691691direction if you are performing a depth-based ``select_related()`` call.
    692692
     693prefetch_related
     694~~~~~~~~~~~~~~~~
     695
     696.. method:: prefetch_related(*fields)
     697
     698.. versionadded:: 1.4
     699
     700Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
     701related many-to-many and many-to-one objects for the specified fields.
     702
     703This is similar to ``select_related`` for the 'many related objects' case, with
     704the following important differences:
     705
     706* ``prefetch_related`` causes a separate query to be issued for each set of
     707  related objects that you request. This query is done as soon as the QuerySet
     708  begins to be evaluated.
     709
     710  This is in contrast to ``select_related``, which modifies the original query
     711  with joins in order to get the related objects in the same query as the main
     712  objects.
     713
     714* It is 'single depth' only, and doesn't support join syntax.
     715
     716The fields that must be supplied to this method can be any attributes on the
     717model instances which represent related queries that return multiple
     718objects. This includes attributes representing the 'many' side of ``ForeignKey``
     719relationships and ``ManyToManyField`` attributes.
     720
     721For example, suppose you have these models::
     722
     723    class Topping(models.Model):
     724        name = models.CharField(max_length=30)
     725
     726    class Pizza(models.Model):
     727        name = models.CharField(max_length=50)
     728        toppings = models.ManyToManyField(Topping)
     729
     730        def __unicode__(self):
     731            return u"%s (%s)" % (self.name, u", ".join([topping.name
     732                                                        for topping in self.toppings.all()]))
     733
     734and run this code::
     735
     736    >>> Pizza.objects.all()
     737    [u"Hawaiian (ham, pineaapple)", u"Seafood (prawns, smoked salmon)"...
     738
     739The problem with this code is that it will run a query on the Toppings table for
     740**every** item in the Pizza ``QuerySet``.  Using ``prefetch_related``, this can
     741be reduced to two:
     742
     743    >>> pizzas = Pizza.objects.all().prefetch_related('toppings')
     744
     745All the relevant toppings will be fetched in a single query, and used to make
     746``QuerySets`` that have a pre-filled cache of the relevant results. These
     747``QuerySets`` are then used in the ``self.toppings.all()`` calls.
     748
     749Please note that use of ``prefetch_related`` will mean that the additional
     750queries run will **always** be executed - even if you never use the related
     751objects - and it always fully populates the result cache on the primary
     752``QuerySet`` (which can sometimes be avoided in other cases).
     753
     754Remember that, as always with QuerySets, any subsequent chained methods will
     755ignore previously cached results, and retrieve data in a fresh database
     756query. So, if you write the following:
     757
     758    >>> pizzas = Pizza.objects.prefetch_related('toppings')
     759    >>> [list(pizza.topppings.filter(spicy=True) for pizza in pizzas]
     760
     761...then the fact that `pizza.toppings.all()` has been prefetched will not help
     762you - in fact it hurts preformance, since you have done a database query that
     763you haven't used. So use this feature with caution!
     764
     765Chaining ``prefetch_related`` calls will accumulate the fields that should have
     766this behaviour applied. To clear any ``prefetch_related`` behaviour, call the
     767method with no arguments.
     768
     769
    693770extra
    694771~~~~~
    695772
  • new file tests/modeltests/prefetch_related/models.py

    diff --git a/tests/modeltests/prefetch_related/__init__.py b/tests/modeltests/prefetch_related/__init__.py
    new file mode 100644
    diff --git a/tests/modeltests/prefetch_related/models.py b/tests/modeltests/prefetch_related/models.py
    new file mode 100644
    - +  
     1from django.db import models
     2
     3
     4class Author(models.Model):
     5    name = models.CharField(max_length=50)
     6    first_book = models.ForeignKey('Book', related_name='first_time_authors')
     7
     8    def __unicode__(self):
     9        return self.name
     10
     11class Book(models.Model):
     12    title = models.CharField(max_length=255)
     13
     14    authors = models.ManyToManyField(Author, related_name='books')
     15
     16    def __unicode__(self):
     17        return self.title
  • new file tests/modeltests/prefetch_related/tests.py

    diff --git a/tests/modeltests/prefetch_related/tests.py b/tests/modeltests/prefetch_related/tests.py
    new file mode 100644
    - +  
     1from django.test import TestCase
     2
     3from models import Author, Book
     4
     5
     6class PrefetchRelatedTests(TestCase):
     7
     8    def setUp(self):
     9
     10        self.book1 = Book.objects.create(title="Poems")
     11        self.book2 = Book.objects.create(title="Jane Eyre")
     12        self.book3 = Book.objects.create(title="Wuthering Heights")
     13
     14        self.author1 = Author.objects.create(name="Charlotte",
     15                                             first_book=self.book1)
     16        self.author2 = Author.objects.create(name="Anne",
     17                                             first_book=self.book1)
     18        self.author3 = Author.objects.create(name="Emily",
     19                                             first_book=self.book1)
     20
     21        self.book1.authors.add(self.author1)
     22        self.book1.authors.add(self.author2)
     23        self.book1.authors.add(self.author3)
     24        self.book2.authors.add(self.author1)
     25        self.book3.authors.add(self.author3)
     26
     27    def test_m2m_forward(self):
     28        with self.assertNumQueries(2):
     29            lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')]
     30
     31        normal_lists = [list(b.authors.all()) for b in Book.objects.all()]
     32        self.assertEqual(lists, normal_lists)
     33
     34
     35    def test_m2m_reverse(self):
     36        with self.assertNumQueries(2):
     37            lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')]
     38
     39        normal_lists = [list(a.books.all()) for a in Author.objects.all()]
     40        self.assertEqual(lists, normal_lists)
     41
     42    def test_foreignkey_reverse(self):
     43        with self.assertNumQueries(2):
     44            lists = [list(b.first_time_authors.all())
     45                     for b in Book.objects.prefetch_related('first_time_authors')]
     46
     47        self.assertQuerysetEqual(self.book2.authors.all(), [u"<Author: Charlotte>"])
     48
     49    def test_survives_clone(self):
     50        with self.assertNumQueries(2):
     51            lists = [list(b.first_time_authors.all())
     52                     for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)]
     53
     54    def test_len(self):
     55        with self.assertNumQueries(2):
     56            qs = Book.objects.prefetch_related('first_time_authors')
     57            length = len(qs)
     58            lists = [list(b.first_time_authors.all())
     59                     for b in qs]
     60
     61    def test_bool(self):
     62        with self.assertNumQueries(2):
     63            qs = Book.objects.prefetch_related('first_time_authors')
     64            x = bool(qs)
     65            lists = [list(b.first_time_authors.all())
     66                     for b in qs]
     67
     68    def test_clear(self):
     69        with self.assertNumQueries(4):
     70            with_prefetch = Author.objects.prefetch_related('books')
     71            without_prefetch = with_prefetch.prefetch_related()
     72            lists = [list(a.books.all()) for a in without_prefetch]
Back to Top