﻿id	summary	reporter	owner	description	type	status	component	version	severity	resolution	keywords	cc	stage	has_patch	needs_docs	needs_tests	needs_better_patch	easy	ui_ux
34993	Unexpected behaviour when filtering across mult-value (aka tomany) joins.	Gordon Wrigley	nobody	"My overall questions here are:
1: does the below all look correct / expected
2: how should I be writing this queryset
3: what are the general guidelines I should follow when doing this kind of thing

I have some models the important bit's of which look like this
{{{
    DataExport
        pass
    DataImport
        from_data_export = ForeignKey(
            DataExport, 
            related_name=""to_data_imports"", 
            null=True,
        )
    Run
        data_import = ForeignKey(
            DataImport, 
            related_name=""runs"", 
            null=False,
        )
        wet = BooleanField(null=False)
        succeeded = BooleanField(null=True)
}}}
The basic idea here is stuff gets exported out of one part of the system.
And then will be imported into another part of the system.
And those imports happen in runs that can be dry/wet and success/fail.

I'm trying to write a query to find exports that do not have successful wet import runs and the SQL it's producing is surprising in a variety of ways.

Below I'm going to show a series of querysets, with the associated SQL and some comments.

This is all on Django 4.1.13, I couldn't find anything in release notes that would suggest any of this has changed since then.

--------------------
{{{ #!python
DataExport.objects.exclude(
    to_data_imports__runs__wet=True, 
    to_data_imports__runs__succeeded=True,
)
}}}

{{{ #!sql
SELECT <snip> 
FROM ""approval_dataexport"" 
WHERE NOT (
    EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" U1 
        INNER JOIN ""data_import_run"" U2 ON (U1.""id"" = U2.""data_import_id"") 
        WHERE (
            U2.""succeeded"" 
            AND U1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    ) 
    AND EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" U1 
        INNER JOIN ""data_import_run"" U2 ON (U1.""id"" = U2.""data_import_id"") 
        WHERE (
            U2.""wet"" 
            AND U1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    )
)
}}}

This is the most concise expression of intent. And so it's not working as desired is a trap for beginners. But it is consistent with where the docs say ""conditions in a single exclude() call will not necessarily refer to the same item"".

--------------------
{{{ #!python
DataExport.objects.exclude(
    Q(
        to_data_imports__runs__wet=True, 
        to_data_imports__runs__succeeded=True,
    )
)
}}}

{{{ #!sql
SELECT <snip> 
FROM ""approval_dataexport"" 
WHERE NOT (
    EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" U1 
        INNER JOIN ""data_import_run"" U2 ON (U1.""id"" = U2.""data_import_id"") 
        WHERE (
            U2.""succeeded"" 
            AND U1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    ) 
    AND EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" U1 
        INNER JOIN ""data_import_run"" U2 ON (U1.""id"" = U2.""data_import_id"") 
        WHERE (
            U2.""wet"" AND U1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    )
)
}}}

AFAIK the docs don't say anything either way on what this should do.


--------------------
{{{ #!python
DataExport.objects.filter(
    ~Q(
        to_data_imports__runs__wet=True, 
        to_data_imports__runs__succeeded=True,
    )
)
}}}

{{{ #!sql
SELECT <snip> 
FROM ""approval_dataexport"" 
WHERE NOT (
    EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" U1 
        INNER JOIN ""data_import_run"" U2 ON (U1.""id"" = U2.""data_import_id"") 
        WHERE (
            U2.""succeeded"" 
            AND U1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    ) 
    AND EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" U1 
        INNER JOIN ""data_import_run"" U2 ON (U1.""id"" = U2.""data_import_id"") 
        WHERE (
            U2.""wet"" 
            AND U1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    )
)
}}}

I am surprised this didn't have the desired result.

--------------------
{{{ #!python
DataExport.objects.exclude(
    to_data_imports__in=DataImport.objects.filter(
        runs__wet=True, 
        runs__succeeded=True,
    )
)
}}}

{{{ #!sql
SELECT <snip> 
FROM ""approval_dataexport""
WHERE NOT (
    EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" V1 
        WHERE (
            V1.""id"" IN (
                SELECT U0.""id"" 
                FROM ""retain_dataimport"" U0 
                INNER JOIN ""data_import_run"" U1 
                ON (U0.""id"" = U1.""data_import_id"") 
                WHERE (U1.""succeeded"" AND U1.""wet"")
            ) 
            AND V1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    )
)
}}}

This one does what I want but the nested query structure seems excessive, particularly the dual `FROM ""retain_dataimport""`, I'm not sure if the planner will do a good job of that.

--------------------
{{{ #!python
DataExport.objects.exclude(
    to_data_imports__runs__in=Run.objects.filter(
        wet=True, 
        succeeded=True,
    )
)
}}}

{{{ #!sql
SELECT <snip> 
FROM ""approval_dataexport"" 
WHERE NOT (
    EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""retain_dataimport"" V1 
        INNER JOIN ""data_import_run"" V2 ON (V1.""id"" = V2.""data_import_id"") 
        WHERE (
            V2.""id"" IN (
                SELECT U0.""id"" 
                FROM ""data_import_run"" U0 
                WHERE (U0.""succeeded"" AND U0.""wet"")
            ) 
            AND V1.""from_data_export_id"" = (""approval_dataexport"".""id"")
        ) 
        LIMIT 1
    )
)
}}}

Basically the same as the above except now the dual selects are on data_import_run.

--------------------
{{{ #!python
DataExport.objects.exclude(
    Exists(
        Run.objects.filter(
            data_import__from_data_export__id=OuterRef('id'), 
            wet=True, 
            succeeded=True,
        )
    )
)
}}}

{{{ #!sql
SELECT <snip> 
FROM ""approval_dataexport"" 
WHERE NOT (
    EXISTS(
        SELECT 1 AS ""a"" 
        FROM ""data_import_run"" U0 
        INNER JOIN ""retain_dataimport"" U1 ON (U0.""data_import_id"" = U1.""id"") 
        WHERE (
            U1.""from_data_export_id"" = (""approval_dataexport"".""id"") 
            AND U0.""succeeded"" 
            AND U0.""wet""
        ) 
        LIMIT 1
    )
)
}}}

This one is starting to look decent
"	Uncategorized	closed	Database layer (models, ORM)	4.1	Normal	duplicate		Julius Seporaitis	Unreviewed	0	0	0	0	0	0
