﻿id	summary	reporter	owner	description	type	status	component	version	severity	resolution	keywords	cc	stage	has_patch	needs_docs	needs_tests	needs_better_patch	easy	ui_ux
34255	Annotation/group by with an expression on psycopg3	Guillaume Andreu Sabater	Mariusz Felisiak	"Given the following code:

{{{
import zoneinfo

from django.db import models
from django.db.models.functions import ExtractYear

from django.contrib.postgres.fields import ArrayField

TZ = zoneinfo.ZoneInfo(""Europe/Paris"")


class JsonBuildObject(models.Func):
    function = ""jsonb_build_object""
    output_field = models.JSONField()


class SubqueryArray(models.Subquery):
    template = ""ARRAY(%(subquery)s)""
    output_field = ArrayField(base_field=models.JSONField())


class CurveQuerySet(models.QuerySet[""Curve""]):
    """"""Curve QuerySet.""""""

    def annotate_loads(self) -> ""CurveQuerySet"":
        """"""Annotate baseload by year.""""""
        baseload_qs = (
            Point.objects.filter(curve=models.OuterRef(""pk""))
            .annotate(year=ExtractYear(""start_at"", tzinfo=TZ))
            .values(""year"")
            .alias(baseload=models.Avg(""value""))
            .annotate(
                json=JsonBuildObject(
                    models.Value(""year""),
                    models.F(""year""),
                    models.Value(""baseload""),
                    models.F(""baseload""),
                )
            )
            .values(""json"")
        )

        return self.annotate(_baseloads=SubqueryArray(baseload_qs))


CurveManager = models.Manager.from_queryset(CurveQuerySet)


class Curve(models.Model):
    """"""Curve.""""""

    objects = CurveManager()


class Point(models.Model):
    """"""Curve point.""""""

    curve = models.ForeignKey(
        Curve,
        on_delete=models.CASCADE,
        related_name=""points"",
        related_query_name=""point"",
    )
    start_at = models.DateTimeField()
    value = models.FloatField()

}}}

I use the ''annotate_loads'' to compute yearly averages (with .values(""year"") acting as a GROUP BY) and dump the results in a json field.

With psycopg3, from what I've seen, the query params/values are not interpolated in the query anymore, but sent alongside the query to the server.

In my case, it looks like this:

{{{
SELECT
    ""fail_curve"".""id"",
    ARRAY(
        SELECT
            jsonb_build_object(
                $1,
                EXTRACT(
                    YEAR
                    FROM
                        U0.""start_at"" AT TIME ZONE $2
                ),
                $3,
                AVG(U0.""value"")
            ) AS ""json""
        FROM
            ""fail_point"" U0
        WHERE
            U0.""curve_id"" = (""fail_curve"".""id"")
        GROUP BY
            EXTRACT(
                YEAR
                FROM
                    U0.""start_at"" AT TIME ZONE $4
            )
    ) AS ""_baseloads""
FROM
    ""fail_curve""
WHERE
    ""fail_curve"".""id"" = $5
}}}

But postgres doesn't like: django.db.utils.ProgrammingError: column ""u0.start_at"" must appear in the GROUP BY clause or be used in an aggregate function 

because

{{{
EXTRACT(
    YEAR
    FROM
        U0.""start_at"" AT TIME ZONE $2
)
}}}
is different from
{{{
EXTRACT(
    YEAR
    FROM
        U0.""start_at"" AT TIME ZONE $4
)
}}}

I tested an updated query using the same placeholder ($4 -> $2) and it worked as expected:

{{{
PREPARE working (text, text, text, int) AS
    SELECT
        ""fail_curve"".""id"",
        ARRAY(
            SELECT
                jsonb_build_object(
                    $1,
                    EXTRACT(
                        YEAR
                        FROM
                            U0.""start_at"" AT TIME ZONE $2
                    ),
                    $3,
                    AVG(U0.""value"")
                ) AS ""json""
            FROM
                ""fail_point"" U0
            WHERE
                U0.""curve_id"" = (""fail_curve"".""id"")
            GROUP BY
                EXTRACT(
                    YEAR
                    FROM
                        U0.""start_at"" AT TIME ZONE $2
                )
        ) AS ""_baseloads""
    FROM
        ""fail_curve""
    WHERE
        ""fail_curve"".""id"" = $4
    LIMIT
        1;
EXECUTE working('year', 'Europe/Paris', 'base', 1);
}}}

My understanding is as follow:
* group by is an expression
* this expression is also used in select
* they have a different placeholder in the query generated by django/psycopg3
* postgres rejects it

Let me know if this needs extra details."	Bug	closed	Database layer (models, ORM)	dev	Release blocker	fixed	orm postgres psycopg3 annotation groupby	Florian Apolloner Simon Charette	Accepted	1	0	0	0	0	0
