﻿id	summary	reporter	owner	description	type	status	component	version	severity	resolution	keywords	cc	stage	has_patch	needs_docs	needs_tests	needs_better_patch	easy	ui_ux
30158	Subquery expressions unnecessarily added to group by	Jonny Fuller	Simon Charette	"Hi friends,

My Django/SQL skills are not good enough to properly isolate the problem independently of my use case detailed below. I believe the problem is Subqueries being forced in to the group_by clause because they are select expressions. Per the below if I remove the subqueries manually from sql group_by, my query runs perfectly. I tried to manually edit the qs.query.group_by, but because it is created by compiler.get_group_by() I cannot fix the ORM query group by clause.

Are Subquery expressions always supposed to be included in group_by? If this is desired behavior is it possible to toggle it off so the ORM can produce the accurate query?

{{{
""""""
Problem Statement: The individual annotations work fine when run independently, but when chained the query takes 5 minutes. This is due to the final group by clause unexpectedly receiving the Subquery as extra fields.
""""""
​
# relevant models and querysets
class ClaimQuerySet(models.QuerySet):
    def annotate_all(self):
        """"""Adds ``results``, ``latest_note_text``, and ``latest_assessment_text`` to the queryset.""""""
        return self.annotate_latest_results().annotate_most_recent_note().annotate_most_recent_assessment()
​
    def prefetch_all(self, annotate_sum=True):
        return self.prefetch_notes().prefetch_latest_results(annotate_sum)
​
    def prefetch_latest_results(self, annotate_sum: bool=True):
        """"""Prefetches the most result :class:`RulesEngineResult` object and optionally
        preload its :attr:`RulesEngineResult.score`.
​
        Args:
            annotate_sum:
        """"""
        latest_runs = self.latest_runs
        if annotate_sum:
            latest_runs = latest_runs.annotate(_score=Sum('results__value'))
        return self.prefetch_related(Prefetch(
            'rules_engine_results', queryset=latest_runs, to_attr='_last_run')
        )
​
    def prefetch_notes(self):
        """"""Prefetches all related notes and assessments.""""""
        return self.prefetch_related('notes', 'assessments')
​
    @property
    def latest_runs(self):
        """"""Shortcut for :attr:`RulesEngineResultQuerySet.get_latest_runs`""""""
        return RulesEngineResult.objects.get_latest_runs()
​
    def annotate_latest_results(self) -> 'ClaimQuerySet':
        """"""Annotates the queryset with a new field ``results`` whose value is the Sum
        of the last :attr:`RulesEngineResult.results` for the claim.
        """"""
        # Only Sum on runs in the above set.
        filter_q = Q(rules_engine_results__in=self.latest_runs)
        # noinspection PyTypeChecker
        return self.annotate(results=Sum('rules_engine_results__results__value', filter=filter_q))
​
    def annotate_most_recent_note(self) -> 'ClaimQuerySet':
        """"""Annotates the queryset with a field ``latest_note_text`` whose value is the last
        entered :attr:`Note.text` for the claim or ``None`` if there are no associated notes.
        """"""
        return self._annotate_most_recent_basenote(Note, 'latest_note_text')
​
    def annotate_most_recent_assessment(self) -> 'ClaimQuerySet':
        """"""Annotates the queryset with a field ``latest_assessment_text`` whose value is the last
        entered :attr:`Assessment.text` for the claim or ``None`` if there are no associated assessments.
        """"""
        return self._annotate_most_recent_basenote(Assessment, 'latest_assessment_text')
​
    def _annotate_most_recent_basenote(self, model: Type['BaseNote'], field_name: str) -> 'ClaimQuerySet':
        newest = model.objects.filter(claim=OuterRef('id')).order_by('-created')
        annotate_kwargs = {
            field_name: Subquery(newest.values('text')[:1])
        }
        # noinspection PyTypeChecker
        return self.annotate(**annotate_kwargs)
​
​
class Claim(BaseClaim):
    """"""Concrete :class:`~mkl.fraud_django.models.BaseClaim` for
    :mod:`~mkl.fraud_django.workers_comp` claims.
    """"""
    objects = ClaimQuerySet.as_manager()
    first_rules_engine_run = models.DateField()
​
    @property
    def latest_note(self) -> 'Note':
        """"""Returns the latest :class:`Note`.""""""
        return self.notes.latest()
​
    @property
    def latest_assessment(self) -> 'Assessment':
        """"""Retrieves the latest :class:`Assessment`.""""""
        return self.assessments.latest()
​
    @property
    def latest_rulesengine_run(self) -> 'RulesEngineResult':
        """"""Returns the most most recent run.
​
        .. note::
​
            Use :meth:`ClaimQuerySet.prefetch_latest_results` to
            prefetch the last_run, falls back on querying for the latest value.
​
            Note, if used in a prefetched queryset the value could be stale.
        """"""
        return self._get_latest(RulesEngineResult, '_last_run')
​
    def _get_latest(self, model: Type[models.Model], cache_attr: str):
        """"""Handler to return None if a latest related object does not exist,
        checks the cache first.""""""
        if hasattr(self, cache_attr):
            try:
                return getattr(self, cache_attr)[0]
            except IndexError:
                return None
        try:
            return model.objects.filter(claim=self).latest()
        except model.DoesNotExist:
            return None
​
    def __unicode__(self):
        return self.claim_number
​
​
class BaseNote(models.Model):
    """"""Abstract Base Model for both Notes and Assessments.
​
    Use this base for any claim related editable field whose
    historical data is important.
​
    On the claim we can write functions to retrieve the latest.
​
    .. note:: The related name will be the class name lower case with an 's'.
​
    Attributes:
        text (str): The user provided content
        created (datetime.datetime): Created time stamp
        claim (:class:`Claim`): The claim related to the note.
    """"""
    id = models.AutoField(primary_key=True)
    text = models.TextField(max_length=1000)
    created = models.DateTimeField(auto_now_add=True)
    claim = models.ForeignKey('Claim', on_delete=models.PROTECT, related_name='%(class)ss')
​
    class Meta:
        abstract = True
        get_latest_by = 'created'
        ordering = ('-created',)
​
​
class Note(BaseNote):
    """"""Concrete class for Notes, related_name will become ``notes``.""""""
​
​
class Assessment(BaseNote):
    """"""Concrete class for Assessment, related_name will become ``assessments``.""""""
    CHOICES = (
        ('01', 'Will open a case'),
        ('02', 'Will not open a case'),
        ('03', 'Previously opened'),
        ('04', ""Appears suspicious but won't open""),
        ('05', 'Not enough info to determine'),
        ('06', 'Existing vendor request'),
    )
    text = models.CharField(max_length=1000, choices=CHOICES)
​
    def get_choice_value(self) -> str:
        """"""Returns the value as the choice human readable text.""""""
        db_text = self.text
        return dict(self.CHOICES)[db_text]
​
​
class RuleResult(models.Model):
    """"""The result of running the engine for a particular claim against a :class:`Rule`.
​
    Attributes:
        rule: The rule to be checked
        value: The numeric weight of the result
        result: The rules engine result of all rules run against the claim
    """"""
    id = models.AutoField(primary_key=True)
    rule = models.ForeignKey('Rule', on_delete=models.PROTECT)
    value = models.IntegerField()
    result = models.ForeignKey('RulesEngineResult', on_delete=models.PROTECT, related_name='results')
​
​
class RulesEngineResultQuerySet(models.QuerySet):
    def get_latest_runs(self):
        """"""Filters to only the most recent :class:`RulesEngineResult`\s.""""""
        annotated = self.annotate(
            latest=Max('claim__rules_engine_results__created')
        )
        return annotated.filter(created=F('latest'))
​
​
class RulesEngineResult(models.Model):
    """"""
    RulesEngine run result.
​
    Attributes:
        claim (:class:`Claim`): The claim run through the RulesEngine.
        results (List[:class:`RuleResult`]): Collection of results for each rule.
    """"""
    id = models.AutoField(primary_key=True)
    created = models.DateTimeField(auto_now_add=True)
    claim = models.ForeignKey('Claim', on_delete=models.PROTECT, related_name='rules_engine_results')
    objects = RulesEngineResultQuerySet.as_manager()
​
    class Meta:
        get_latest_by = 'created'
​
    @property
    def score(self) -> int:
        """"""Returns the aggregate score of all related results. Checks prefetched cache first.""""""
        if hasattr(self, '_score'):
            return self._score
        d = self.results.aggregate(score=models.Sum('value'))
        return d['score']
​
​
""""""
Individual Query rendering
""""""
# Recent Note
qs = models.Claim.objects.annotate_most_recent_note()
​
SELECT ""workers_comp_claim"".""id"",
       ""workers_comp_claim"".""claim_number"",
       ""workers_comp_claim"".""first_rules_engine_run"",
       (SELECT U0.""text""
        FROM ""workers_comp_note"" U0
        WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
        ORDER BY U0.""created"" DESC
        LIMIT 1) AS ""latest_note_text""
FROM ""workers_comp_claim""
​
# Recent Assessment
qs = models.Claim.objects.annotate_most_recent_assessment()
​
SELECT ""workers_comp_claim"".""id"",
       ""workers_comp_claim"".""claim_number"",
       ""workers_comp_claim"".""first_rules_engine_run"",
       (SELECT U0.""text""
        FROM ""workers_comp_assessment"" U0
        WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
        ORDER BY U0.""created"" DESC
        LIMIT 1) AS ""latest_assessment_text""
FROM ""workers_comp_claim""
​
# Latest Results (Run)
qs = models.Claim.objects.annotate_latest_results()
​
SELECT ""workers_comp_claim"".""id"",
       ""workers_comp_claim"".""claim_number"",
       ""workers_comp_claim"".""first_rules_engine_run"",
       SUM(""workers_comp_ruleresult"".""value"") FILTER (WHERE ""workers_comp_rulesengineresult"".""id"" IN (SELECT U0.""id""
                                                                                                      FROM ""workers_comp_rulesengineresult"" U0
                                                                                                             INNER JOIN ""workers_comp_claim"" U1 ON (U0.""claim_id"" = U1.""id"")
                                                                                                             LEFT OUTER JOIN ""workers_comp_rulesengineresult"" U2 ON (U1.""id"" = U2.""claim_id"")
                                                                                                      GROUP BY U0.""id""
                                                                                                      HAVING U0.""created"" = (MAX(U2.""created"")))) AS ""results""
FROM ""workers_comp_claim""
       LEFT OUTER JOIN ""workers_comp_rulesengineresult""
                       ON (""workers_comp_claim"".""id"" = ""workers_comp_rulesengineresult"".""claim_id"")
       LEFT OUTER JOIN ""workers_comp_ruleresult""
                       ON (""workers_comp_rulesengineresult"".""id"" = ""workers_comp_ruleresult"".""result_id"")
GROUP BY ""workers_comp_claim"".""id""
​
​
""""""
When chained the query renders incorrectly like this
""""""
qs = models.Claim.objects.annotate_latest_results().annotate_most_recent_note().annotate_most_recent_assessment()
​
SELECT ""workers_comp_claim"".""id"",
       ""workers_comp_claim"".""claim_number"",
       ""workers_comp_claim"".""first_rules_engine_run"",
       SUM(""workers_comp_ruleresult"".""value"") FILTER (WHERE ""workers_comp_rulesengineresult"".""id"" IN (SELECT U0.""id""
                                                                                                      FROM ""workers_comp_rulesengineresult"" U0
                                                                                                             INNER JOIN ""workers_comp_claim"" U1 ON (U0.""claim_id"" = U1.""id"")
                                                                                                             LEFT OUTER JOIN ""workers_comp_rulesengineresult"" U2 ON (U1.""id"" = U2.""claim_id"")
                                                                                                      GROUP BY U0.""id""
                                                                                                      HAVING U0.""created"" = (MAX(U2.""created"")))) AS ""results"",
       (SELECT U0.""text""
        FROM ""workers_comp_note"" U0
        WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
        ORDER BY U0.""created"" DESC
        LIMIT 1)                                                                                                                                  AS ""latest_note_text"",
       (SELECT U0.""text""
        FROM ""workers_comp_assessment"" U0
        WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
        ORDER BY U0.""created"" DESC
        LIMIT 1)                                                                                                                                  AS ""latest_assessment_text""
FROM ""workers_comp_claim""
       LEFT OUTER JOIN ""workers_comp_rulesengineresult""
                       ON (""workers_comp_claim"".""id"" = ""workers_comp_rulesengineresult"".""claim_id"")
       LEFT OUTER JOIN ""workers_comp_ruleresult""
                       ON (""workers_comp_rulesengineresult"".""id"" = ""workers_comp_ruleresult"".""result_id"")
GROUP BY ""workers_comp_claim"".""id"", (SELECT U0.""text""
                                     FROM ""workers_comp_note"" U0
                                     WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
                                     ORDER BY U0.""created"" DESC
                                     LIMIT 1), (SELECT U0.""text""
                                                FROM ""workers_comp_assessment"" U0
                                                WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
                                                ORDER BY U0.""created"" DESC
                                                LIMIT 1)
​
​
""""""
Why is Django performing the group by with the Subqueries? How do I make it render correctly like this:
""""""
SELECT ""workers_comp_claim"".""id"",
       ""workers_comp_claim"".""claim_number"",
       ""workers_comp_claim"".""first_rules_engine_run"",
       SUM(""workers_comp_ruleresult"".""value"") FILTER (WHERE ""workers_comp_rulesengineresult"".""id"" IN (SELECT U0.""id""
                                                                                                      FROM ""workers_comp_rulesengineresult"" U0
                                                                                                             INNER JOIN ""workers_comp_claim"" U1 ON (U0.""claim_id"" = U1.""id"")
                                                                                                             LEFT OUTER JOIN ""workers_comp_rulesengineresult"" U2 ON (U1.""id"" = U2.""claim_id"")
                                                                                                      GROUP BY U0.""id""
                                                                                                      HAVING U0.""created"" = (MAX(U2.""created"")))) AS ""results"",
       (SELECT U0.""text""
        FROM ""workers_comp_note"" U0
        WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
        ORDER BY U0.""created"" DESC
        LIMIT 1)                                                                                                                                  AS ""latest_note_text"",
       (SELECT U0.""text""
        FROM ""workers_comp_assessment"" U0
        WHERE U0.""claim_id"" = (""workers_comp_claim"".""id"")
        ORDER BY U0.""created"" DESC
        LIMIT 1)                                                                                                                                  AS ""latest_assessment_text""
FROM ""workers_comp_claim""
       LEFT OUTER JOIN ""workers_comp_rulesengineresult""
                       ON (""workers_comp_claim"".""id"" = ""workers_comp_rulesengineresult"".""claim_id"")
       LEFT OUTER JOIN ""workers_comp_ruleresult""
                       ON (""workers_comp_rulesengineresult"".""id"" = ""workers_comp_ruleresult"".""result_id"")
GROUP BY ""workers_comp_claim"".""id"";
}}}
"	Cleanup/optimization	closed	Database layer (models, ORM)	dev	Normal	fixed	subquery, group_by	Simon Charette	Accepted	1	0	0	0	0	0
