Opened 14 years ago
Closed 14 years ago
#16165 closed Bug (worksforme)
Annotations get crazy when .values has many arguments (MySQL)
| Reported by: | Owned by: | nobody | |
|---|---|---|---|
| Component: | Database layer (models, ORM) | Version: | dev |
| Severity: | Normal | Keywords: | aggregate, annotate |
| Cc: | Triage Stage: | Unreviewed | |
| Has patch: | no | Needs documentation: | no |
| Needs tests: | no | Patch needs improvement: | no |
| Easy pickings: | no | UI/UX: | no |
Description (last modified by )
Models in "mars" project:
class BiddingRule(models.Model): name = models.CharField(max_length=80) ... class TagList(models.Model): name = models.CharField(max_length=50) ... def __unicode__(self): return self.name class Property(models.Model): property = models.CharField(max_length=100) def __unicode__(self): return self.property class Meta: ordering = ['property'] verbose_name_plural = 'properties' class Account(models.Model): """A property has 1..n accounts""" username = models.CharField(max_length=200) password = models.CharField(max_length=200) ENGINE_CHOICES = ( (1, 'Google'), (2, 'Yahoo'), (3, 'Bing'), ) property = models.ForeignKey(Property) engine = models.IntegerField(choices=ENGINE_CHOICES) class Campaign(models.Model): """An account has 1..n campaigns""" campaign = models.CharField(max_length=500) engine_id = models.BigIntegerField() account = models.ForeignKey(Account) STATUS_CHOICES = ( (1, 'Active'), (2, 'Paused'), (3, 'Deleted'), ) status = models.IntegerField(choices=STATUS_CHOICES, default=1) budget = models.FloatField(default=0.0) tags = models.ManyToManyField('TagList') visible_tags = models.CharField(max_length=200, blank=True, null=True) bidding_rule = models.ForeignKey(BiddingRule, null=True, blank=True) class Meta: ordering = ['campaign'] def __unicode__(self): return self.campaign class AdGroup(models.Model): """A Campaign has 1..n AdGroups""" ad_group = models.CharField(max_length=500) engine_id = models.BigIntegerField() campaign = models.ForeignKey(Campaign) STATUS_CHOICES = ( (1, 'Active'), (2, 'Paused'), (3, 'Deleted'), (4, 'Draft'), ) status = models.IntegerField(choices=STATUS_CHOICES, default=1) max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) content_max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) tags = models.ManyToManyField('TagList') visible_tags = models.CharField(max_length=200, blank=True, null=True) bidding_rule = models.ForeignKey(BiddingRule, null=True, blank=True) class Meta: ordering = ['ad_group'] class CampaignStat(models.Model): """Stats by campaign""" date = models.DateField() account = models.ForeignKey(Account) campaign = models.ForeignKey(Campaign) impressions = models.IntegerField(default=0) clicks = models.IntegerField(default=0) cost = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) conversions = models.IntegerField(default=0) revenue = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) average_position = models.FloatField(default=0.0) class AdGroupStat(models.Model): """Stats by ad group""" date = models.DateField() account = models.ForeignKey(Account) ad_group = models.ForeignKey(AdGroup) impressions = models.IntegerField(default=0) clicks = models.IntegerField(default=0) cost = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) conversions = models.IntegerField(default=0) revenue = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) average_position = models.FloatField(default=0.0)
And here is the crazy annotation issue I am running into. Note the GROUP BY section of each query.
>>> from mars.models import *
>>> from django.db.models import Sum
>>> annotations = {
'_impressions': Sum('impressions'),
'_clicks': Sum('clicks'),
'_cost': Sum('cost'),
'_average_position': Avg('average_position'),
'_conversions': Sum('conversions'),
'_revenue': Sum('revenue')
}
>>> ad_group_values = ['ad_group__id',
'ad_group__engine_id',
'ad_group__ad_group',
'ad_group__campaign__account__engine',
'ad_group__campaign__campaign',
'ad_group__max_cpc',
'ad_group__content_max_cpc',
'ad_group__status',
'ad_group__bidding_rule__name',
'ad_group__visible_tags']
>>> stats = AdGroupStat.objects.values(*ad_group_values)
>>> stats = stats.annotate(**annotations)
>>> print(stats.query)
SELECT
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`,
AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`,
SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`,
SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`,
SUM(`mars_adgroupstat`.`cost`) AS `_cost`,
SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`,
SUM(`mars_adgroupstat`.`impressions`) AS `_impressions`
FROM
`mars_adgroupstat`
INNER JOIN
`mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`)
INNER JOIN
`mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`)
INNER JOIN
`mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`)
LEFT OUTER JOIN
`mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`)
GROUP BY
`mars_adgroupstat`.`id`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`
ORDER BY NULL
>>> stats = AdGroupStat.objects.values(*ad_group_values[:-1])
>>> stats = stats.annotate(**annotations)
>>> print(stats.query)
SELECT
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`,
SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`,
SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`,
SUM(`mars_adgroupstat`.`cost`) AS `_cost`,
SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`,
SUM(`mars_adgroupstat`.`impressions`) AS `_impressions`
FROM
`mars_adgroupstat`
INNER JOIN
`mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`)
INNER JOIN
`mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`)
INNER JOIN
`mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`)
LEFT OUTER JOIN
`mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`)
GROUP BY
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`
ORDER BY NULL
>>> stats = AdGroupStat.objects.values(*ad_group_values[1:])
>>> stats = stats.annotate(**annotations)
>>> print(stats.query)
SELECT
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`,
AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`,
SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`,
SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`,
SUM(`mars_adgroupstat`.`cost`) AS `_cost`,
SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`,
SUM(`mars_adgroupstat`.`impressions`) AS `_impressions`
FROM
`mars_adgroupstat`
INNER JOIN
`mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`)
INNER JOIN
`mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`)
INNER JOIN
`mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`)
LEFT OUTER JOIN
`mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`)
GROUP BY
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`
ORDER BY NULL
>>>
In the first query, using all 10 arguments in .values, the GROUP BY has 11 items as it seems to be adding the local model id:
GROUP BY
`mars_adgroupstat`.`id`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`
In the second or third queries, using 9 arguments in .values, the GROUP BY has 18 items as it seems to be doubling the GROUP BY args:
GROUP BY
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroup`.`visible_tags`
GROUP BY
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_account`.`engine`,
`mars_campaign`.`campaign`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_biddingrule`.`name`
The first query does not return what I want, while the second and third return correctly, but look pretty funky. Using MySQL 5.1.41-3ubuntu12.8, Django SVN 16332, Python 2.6.5 on Ubuntu 10.04.
Change History (4)
comment:1 by , 14 years ago
| Description: | modified (diff) |
|---|
comment:2 by , 14 years ago
It would be great if the case can be reduced by the OP to a simpler one where the reported issue is still present. That way we could be able to diagnose if this is e.g. a duplicate of #14357.
comment:3 by , 14 years ago
Simplified:
Models:
class Account(models.Model): username = models.CharField(max_length=200) password = models.CharField(max_length=200) engine = models.IntegerField(choices=ENGINE_CHOICES) class AdGroup(models.Model): ad_group = models.CharField(max_length=500) engine_id = models.BigIntegerField() status = models.IntegerField(choices=STATUS_CHOICES, default=1) max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) content_max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) class Meta: ordering = ['ad_group'] class AdGroupStat(models.Model): date = models.DateField() account = models.ForeignKey(Account) ad_group = models.ForeignKey(AdGroup) impressions = models.IntegerField(default=0)
And here is the crazy annotation issue I am running into:
>>> from mars.models import *
>>> from django.db.models import Sum
>>> ad_group_values = [
'account__engine',
'account__username',
'account__password',
'ad_group__id',
'ad_group__engine_id',
'ad_group__ad_group',
'ad_group__max_cpc',
'ad_group__content_max_cpc',
'ad_group__status'
]
>>> s = AdGroupStat.objects.values(*ad_group_values)
>>> s = s.annotate(Sum('impressions'))
>>> print(s.query)
SELECT
`mars_account`.`engine`,
`mars_account`.`username`,
`mars_account`.`password`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
SUM(`mars_adgroupstat`.`impressions`) AS `impressions__sum`
FROM
`mars_adgroupstat`
INNER JOIN
`mars_account` ON (`mars_adgroupstat`.`account_id` = `mars_account`.`id`)
INNER JOIN
`mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`)
GROUP BY
`mars_account`.`engine`,
`mars_account`.`username`,
`mars_account`.`password`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`
ORDER BY NULL
>>> ad_group_values.append('account__id')
>>> s = AdGroupStat.objects.values(*ad_group_values)
>>> s = s.annotate(Sum('impressions'))
>>> print(s.query)
SELECT
`mars_account`.`engine`,
`mars_account`.`username`,
`mars_account`.`password`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_adgroupstat`.`account_id`,
SUM(`mars_adgroupstat`.`impressions`) AS `impressions__sum`
FROM
`mars_adgroupstat`
INNER JOIN
`mars_account` ON (`mars_adgroupstat`.`account_id` = `mars_account`.`id`)
INNER JOIN
`mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`)
GROUP BY
`mars_adgroupstat`.`id`,
`mars_account`.`engine`,
`mars_account`.`username`,
`mars_account`.`password`,
`mars_adgroupstat`.`ad_group_id`,
`mars_adgroup`.`engine_id`,
`mars_adgroup`.`ad_group`,
`mars_adgroup`.`max_cpc`,
`mars_adgroup`.`content_max_cpc`,
`mars_adgroup`.`status`,
`mars_adgroupstat`.`account_id`
ORDER BY NULL
Note the extra mars_adgroupstat.id in the GROUP BY of the second query.
comment:4 by , 14 years ago
| Resolution: | → worksforme |
|---|---|
| Status: | new → closed |
| UI/UX: | unset |
I can't reproduce this problem given the code you've provided and using the Subversion r16341 with MySQL.
Wrapped SQL to allow for easier reading.