id summary reporter owner description type status component version severity resolution keywords cc stage has_patch needs_docs needs_tests needs_better_patch easy ui_ux 16554 Unnecessary join when using a reverse foreign-key relationship in separate filter or aggregate calls Ben Davis nobody "Django allows you to perform queries across reverse foreign key relationships. If, however, you need to access that same relationship in more than one filter or aggregate call, the ORM creates unnecessary joins. == Example == app/models.py: {{{#!python class Author(models.Model): first_name = models.CharField(max_length=30) last_name = models.CharField(max_length=30) email = models.EmailField() class Book(models.Model): author = models.ForeignKey(Author, related_name='books') title = models.CharField(max_length=100) genre = models.CharField(max_length=20, choices=( ('SCIFI', 'SciFi'), ('FANTASY', 'Fantasy'), ('NONFICTION', 'NonFiction') )) published = models.DateField() pages = models.IntegerField() }}} This particular query will only perform one join on app_books: {{{#!python qs = Author.objects.filter(books__genre='SCIFI', books__pages__gt=500) print qs.query }}} {{{#!sql SELECT ""app_author"".""id"", ""app_author"".""first_name"", ""app_author"".""last_name"", ""app_author"".""email"" FROM ""app_author"" INNER JOIN ""app_book"" ON (""app_author"".""id"" = ""app_book"".""author_id"") WHERE (""app_book"".""genre"" = 'SCIFI' AND ""app_book"".""pages"" > 500 ) }}} However, if you separate the filter() call into two separate calls, you get this: {{{#!python qs = Author.objects.filter(books__genre='SCIFI') qs = qs.filter(books__pages__gt=500) print qs.query }}} {{{#!sql SELECT ""app_author"".""id"", ""app_author"".""first_name"", ""app_author"".""last_name"", ""app_author"".""email"" FROM ""app_author"" INNER JOIN ""app_book"" ON (""app_author"".""id"" = ""app_book"".""author_id"") INNER JOIN ""app_book"" T3 ON (""app_author"".""id"" = T3.""author_id"") WHERE (""app_book"".""genre"" = 'SCIFI' AND T3.""pages"" > 500 ) }}} As you can see, simply separating out the filters into separate calls creates a new, completely unecessary join. This also occurs if you need to do an aggregate: {{{#!python qs = Author.objects.annotate(pages_written=Sum('books__pages')) qs = qs.filter(books__genre='SCIFI') print qs.query }}} {{{#!sql SELECT ""app_author"".""id"", ""app_author"".""first_name"", ""app_author"".""last_name"", ""app_author"".""email"", SUM(""app_book"".""pages"") AS ""pages_written"" FROM ""app_author"" LEFT OUTER JOIN ""app_book"" ON (""app_author"".""id"" = ""app_book"".""author_id"") INNER JOIN ""app_book"" T3 ON (""app_author"".""id"" = T3.""author_id"") WHERE T3.""genre"" = 'SCIFI' GROUP BY ""app_author"".""id"", ""app_author"".""first_name"", ""app_author"".""last_name"", ""app_author"".""email"" }}} In this particular case, I imagine the ORM is seeing the need for two joins because one is outer and one is inner, however, I would argue that if an outer join already occurs as a result of an aggregate, it should always use the existing join." Bug closed Database layer (models, ORM) dev Normal invalid Ben Davis Unreviewed 0 0 0 0 0 0