Django

Code

Ticket #1219: bulk-delete-v2.patch

File bulk-delete-v2.patch, 20.0 kB (added by russellm, 3 years ago)

Patch implementing bulk delete in post-descriptor magic-removal

  • django/db/models/base.py

    old new  
    33from django.db.models.fields import AutoField, ImageField 
    44from django.db.models.fields.related import OneToOne, ManyToOne 
    55from django.db.models.related import RelatedObject 
    6 from django.db.models.query import orderlist2sql 
     6from django.db.models.query import orderlist2sql, delete_objects 
    77from django.db.models.options import Options, AdminOptions 
    88from django.db import connection, backend 
    99from django.db.models import signals 
     
    4949        register_models(new_class._meta.app_label, new_class) 
    5050        return new_class 
    5151 
    52 def cmp_cls(x, y): 
    53     for field in x._meta.fields: 
    54         if field.rel and not field.null and field.rel.to == y: 
    55             return -1 
    56     for field in y._meta.fields: 
    57         if field.rel and not field.null and field.rel.to == x: 
    58             return 1 
    59     return 0 
    60  
    6152class Model(object): 
    6253    __metaclass__ = ModelBase 
    6354 
     
    187178 
    188179    save.alters_data = True 
    189180 
    190     def __collect_sub_objects(self, seen_objs): 
     181    def _collect_sub_objects(self, seen_objs): 
    191182        """ 
    192183        Recursively populates seen_objs with all objects related to this object. 
    193184        When done, seen_objs will be in the format: 
     
    207198                except ObjectDoesNotExist: 
    208199                    pass 
    209200                else: 
    210                     sub_obj.__collect_sub_objects(seen_objs) 
     201                    sub_obj._collect_sub_objects(seen_objs) 
    211202            else: 
    212203                for sub_obj in getattr(self, rel_opts_name).all(): 
    213                     sub_obj.__collect_sub_objects(seen_objs) 
     204                    sub_obj._collect_sub_objects(seen_objs) 
    214205 
    215206    def delete(self): 
    216207        assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname) 
     208         
     209        # Find all the objects than need to be deleted 
    217210        seen_objs = {} 
    218         self.__collect_sub_objects(seen_objs) 
    219  
    220         seen_classes = set(seen_objs.keys()) 
    221         ordered_classes = list(seen_classes) 
    222         ordered_classes.sort(cmp_cls) 
    223  
    224         cursor = connection.cursor() 
    225  
    226         for cls in ordered_classes: 
    227             seen_objs[cls] = seen_objs[cls].items() 
    228             seen_objs[cls].sort() 
    229             for pk_val, instance in seen_objs[cls]: 
    230                 dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance) 
    231  
    232                 for related in cls._meta.get_all_related_many_to_many_objects(): 
    233                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \ 
    234                         (backend.quote_name(related.field.get_m2m_db_table(related.opts)), 
    235                         backend.quote_name(cls._meta.object_name.lower() + '_id')), 
    236                         [pk_val]) 
    237                 for f in cls._meta.many_to_many: 
    238                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \ 
    239                         (backend.quote_name(f.get_m2m_db_table(cls._meta)), 
    240                         backend.quote_name(cls._meta.object_name.lower() + '_id')), 
    241                         [pk_val]) 
    242                 for field in cls._meta.fields: 
    243                     if field.rel and field.null and field.rel.to in seen_classes: 
    244                         cursor.execute("UPDATE %s SET %s=NULL WHERE %s=%%s" % \ 
    245                             (backend.quote_name(cls._meta.db_table), backend.quote_name(field.column), 
    246                             backend.quote_name(cls._meta.pk.column)), [pk_val]) 
    247                         setattr(instance, field.attname, None) 
    248  
    249         for cls in ordered_classes: 
    250             seen_objs[cls].reverse() 
    251             for pk_val, instance in seen_objs[cls]: 
    252                 cursor.execute("DELETE FROM %s WHERE %s=%%s" % \ 
    253                     (backend.quote_name(cls._meta.db_table), backend.quote_name(cls._meta.pk.column)), 
    254                     [pk_val]) 
    255                 setattr(instance, cls._meta.pk.attname, None) 
    256                 dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance) 
    257  
    258         connection.commit() 
    259  
     211        self._collect_sub_objects(seen_objs) 
     212         
     213        # Actually delete the objects 
     214        delete_objects(seen_objs) 
     215         
    260216    delete.alters_data = True 
    261217 
    262218    def _get_FIELD_display(self, field): 
  • django/db/models/manager.py

    old new  
    5757    def dates(self, *args, **kwargs): 
    5858        return self.get_query_set().dates(*args, **kwargs) 
    5959 
    60     def delete(self, *args, **kwargs): 
    61         return self.get_query_set().delete(*args, **kwargs) 
    62  
    6360    def distinct(self, *args, **kwargs): 
    6461        return self.get_query_set().distinct(*args, **kwargs) 
    6562 
  • django/db/models/query.py

    old new  
    11from django.db import backend, connection 
    22from django.db.models.fields import DateField, FieldDoesNotExist 
     3from django.db.models import signals 
     4from django.dispatch import dispatcher 
    35from django.utils.datastructures import SortedDict 
     6 
    47import operator 
    58 
    69LOOKUP_SEPARATOR = '__' 
     
    125128        extra_select = self._select.items() 
    126129 
    127130        cursor = connection.cursor() 
    128         select, sql, params = self._get_sql_clause(True
     131        select, sql, params = self._get_sql_clause(
    129132        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params) 
    130133        fill_cache = self._select_related 
    131134        index_end = len(self.model._meta.fields) 
     
    149152        counter._offset = None 
    150153        counter._limit = None 
    151154        counter._select_related = False 
    152         select, sql, params = counter._get_sql_clause(True
     155        select, sql, params = counter._get_sql_clause(
    153156        cursor = connection.cursor() 
    154157        cursor.execute("SELECT COUNT(*)" + sql, params) 
    155158        return cursor.fetchone()[0] 
     
    171174        assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model" 
    172175        return self._clone(_limit=1, _order_by=('-'+latest_by,)).get() 
    173176 
    174     def delete(self, *args, **kwargs): 
     177    def delete(self): 
    175178        """ 
    176         Deletes the records with the given kwargs. If no kwargs are given, 
    177         deletes records in the current QuerySet. 
     179        Deletes the records in the current QuerySet. 
    178180        """ 
    179         # Remove the DELETE_ALL argument, if it exists. 
    180         delete_all = kwargs.pop('DELETE_ALL', False) 
     181        del_query = self._clone()         
    181182 
    182         # Check for at least one query argument. 
    183         if not kwargs and not delete_all: 
    184             raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data." 
    185  
    186         if kwargs: 
    187             del_query = self.filter(*args, **kwargs) 
    188         else: 
    189             del_query = self._clone() 
    190183        # disable non-supported fields 
    191184        del_query._select_related = False 
    192         del_query._select = {} 
    193185        del_query._order_by = [] 
    194186        del_query._offset = None 
    195187        del_query._limit = None 
    196188 
    197         # Perform the SQL delete 
    198         cursor = connection.cursor() 
    199         _, sql, params = del_query._get_sql_clause(False) 
    200         cursor.execute("DELETE " + sql, params) 
    201  
     189        # Collect all the objects to be deleted, and all the objects that are related to  
     190        # the objects that are to be deleted 
     191        seen_objs = {} 
     192        for object in del_query: 
     193            object._collect_sub_objects(seen_objs) 
     194         
     195        # Delete the objects     
     196        delete_objects(seen_objs) 
     197         
     198        # Clear the result cache, in case this QuerySet gets reused. 
     199        self._result_cache = None 
     200    delete.alters_data = True 
     201         
    202202    ################################################## 
    203203    # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS # 
    204204    ################################################## 
     
    297297            self._result_cache = list(self.iterator()) 
    298298        return self._result_cache 
    299299 
    300     def _get_sql_clause(self, allow_joins): 
     300    def _get_sql_clause(self): 
    301301        opts = self.model._meta 
    302302 
    303303        # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z. 
     
    325325        # Start composing the body of the SQL statement. 
    326326        sql = [" FROM", backend.quote_name(opts.db_table)] 
    327327 
    328         # Check if extra tables are allowed. If not, throw an error 
    329         if (tables or joins) and not allow_joins: 
    330             raise TypeError, "Joins are not allowed in this type of query" 
    331  
    332328        # Compose the join dictionary into SQL describing the joins. 
    333329        if joins: 
    334330            sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition) 
     
    407403            field_names = [f.attname for f in self.model._meta.fields] 
    408404 
    409405        cursor = connection.cursor() 
    410         select, sql, params = self._get_sql_clause(True
     406        select, sql, params = self._get_sql_clause(
    411407        select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns] 
    412408        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params) 
    413409        while 1: 
     
    429425        if self._field.null: 
    430426            date_query._where.append('%s.%s IS NOT NULL' % \ 
    431427                (backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column))) 
    432         select, sql, params = self._get_sql_clause(True
     428        select, sql, params = self._get_sql_clause(
    433429        sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \ 
    434430            (backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table), 
    435431            backend.quote_name(self._field.column))), sql, self._order) 
     
    762758        params.extend(field.get_db_prep_lookup(clause, value)) 
    763759 
    764760    return tables, joins, where, params 
     761 
     762def compare_models(x, y): 
     763    "Comparator for Models that puts models in an order where dependencies are easily resolved." 
     764    for field in x._meta.fields: 
     765        if field.rel and not field.null and field.rel.to == y: 
     766            return -1 
     767    for field in y._meta.fields: 
     768        if field.rel and not field.null and field.rel.to == x: 
     769            return 1 
     770    return 0 
     771 
     772def delete_objects(seen_objs): 
     773    "Iterate through a list of seen classes, and remove any instances that are referred to" 
     774    seen_classes = set(seen_objs.keys()) 
     775    ordered_classes = list(seen_classes) 
     776    ordered_classes.sort(compare_models) 
     777 
     778    cursor = connection.cursor() 
     779      
     780    for cls in ordered_classes: 
     781        seen_objs[cls] = seen_objs[cls].items() 
     782        seen_objs[cls].sort() 
     783     
     784        # Pre notify all instances to be deleted 
     785        for pk_val, instance in seen_objs[cls]: 
     786            dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance) 
     787 
     788        pk_list = [pk for pk,instance in seen_objs[cls]] 
     789        for related in cls._meta.get_all_related_many_to_many_objects(): 
     790            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \ 
     791                (backend.quote_name(related.field.get_m2m_db_table(related.opts)), 
     792                    backend.quote_name(cls._meta.object_name.lower() + '_id'), 
     793                    ','.join('%s' for pk in pk_list)),  
     794                pk_list) 
     795        for f in cls._meta.many_to_many: 
     796            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \ 
     797                (backend.quote_name(f.get_m2m_db_table(cls._meta)), 
     798                    backend.quote_name(cls._meta.object_name.lower() + '_id'), 
     799                    ','.join(['%s' for pk in pk_list])),  
     800                pk_list) 
     801        for field in cls._meta.fields: 
     802            if field.rel and field.null and field.rel.to in seen_classes: 
     803                cursor.execute("UPDATE %s SET %s=NULL WHERE %s IN (%s)" % \ 
     804                    (backend.quote_name(cls._meta.db_table),  
     805                        backend.quote_name(field.column), 
     806                        backend.quote_name(cls._meta.pk.column),  
     807                        ','.join(['%s' for pk in pk_list])),  
     808                    pk_list) 
     809 
     810    # Now delete the actual data 
     811    for cls in ordered_classes: 
     812        seen_objs[cls].reverse() 
     813        pk_list = [pk for pk,instance in seen_objs[cls]] 
     814         
     815        cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \ 
     816            (backend.quote_name(cls._meta.db_table),  
     817                backend.quote_name(cls._meta.pk.column), 
     818                ','.join(['%s' for pk in pk_list])), 
     819            pk_list) 
     820                 
     821        # Last cleanup; set NULLs where there once was a reference to the object, 
     822        # NULL the primary key of the found objects, and perform post-notification. 
     823        for pk_val, instance in seen_objs[cls]: 
     824            for field in cls._meta.fields: 
     825                if field.rel and field.null and field.rel.to in seen_classes: 
     826                    setattr(instance, field.attname, None) 
     827 
     828            setattr(instance, cls._meta.pk.attname, None) 
     829            dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance) 
     830 
     831    connection.commit() 
  • tests/modeltests/basic/models.py

    old new  
    99class Article(models.Model): 
    1010    headline = models.CharField(maxlength=100, default='Default headline') 
    1111    pub_date = models.DateTimeField() 
    12  
     12     
     13    def __repr__(self): 
     14        return self.headline 
    1315API_TESTS = """ 
    1416 
    1517# No articles are in the system yet. 
     
    3739>>> a.headline = 'Area woman programs in Python' 
    3840>>> a.save() 
    3941 
    40 # Article.objects.all() returns all the articles in the database. Note that 
    41 # the article is represented by "<Article object>", because we haven't given 
    42 # the Article model a __repr__() method. 
     42# Article.objects.all() returns all the articles in the database.  
    4343>>> Article.objects.all() 
    44 [<Article object>
     44[Area woman programs in Python
    4545 
    4646# Django provides a rich database lookup API. 
    4747>>> Article.objects.get(id__exact=1) 
    48 <Article object> 
     48Area woman programs in Python 
    4949>>> Article.objects.get(headline__startswith='Area woman') 
    50 <Article object> 
     50Area woman programs in Python 
    5151>>> Article.objects.get(pub_date__year=2005) 
    52 <Article object> 
     52Area woman programs in Python 
    5353>>> Article.objects.get(pub_date__year=2005, pub_date__month=7) 
    54 <Article object> 
     54Area woman programs in Python 
    5555>>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28) 
    56 <Article object> 
     56Area woman programs in Python 
    5757 
    5858# The "__exact" lookup type can be omitted, as a shortcut. 
    5959>>> Article.objects.get(id=1) 
    60 <Article object> 
     60Area woman programs in Python 
    6161>>> Article.objects.get(headline='Area woman programs in Python') 
    62 <Article object> 
     62Area woman programs in Python 
    6363 
    6464>>> Article.objects.filter(pub_date__year=2005) 
    65 [<Article object>
     65[Area woman programs in Python
    6666>>> Article.objects.filter(pub_date__year=2004) 
    6767[] 
    6868>>> Article.objects.filter(pub_date__year=2005, pub_date__month=7) 
    69 [<Article object>
     69[Area woman programs in Python
    7070 
    7171# Django raises an Article.DoesNotExist exception for get() if the parameters 
    7272# don't match any object. 
     
    8484# shortcut for primary-key exact lookups. 
    8585# The following is identical to articles.get(id=1). 
    8686>>> Article.objects.get(pk=1) 
    87 <Article object> 
     87Area woman programs in Python 
    8888 
    8989# Model instances of the same type and same ID are considered equal. 
    9090>>> a = Article.objects.get(pk=1) 
     
    234234 
    235235# You can get items using index and slice notation. 
    236236>>> Article.objects.all()[0] 
    237 <Article object> 
     237Area woman programs in Python 
    238238>>> Article.objects.all()[1:3] 
    239 [<Article object>, <Article object>
     239[Second article, Third article
    240240>>> s3 = Article.objects.filter(id__exact=3) 
    241241>>> (s1 | s2 | s3)[::2] 
    242 [<Article object>, <Article object>
     242[Area woman programs in Python, Third article
    243243 
    244244# An Article instance doesn't have access to the "objects" attribute. 
    245245# That's only available on the class. 
     
    254254AttributeError: Manager isn't accessible via Article instances 
    255255 
    256256# Bulk delete test: How many objects before and after the delete? 
    257 >>> Article.objects.count() 
    258 8L 
    259 >>> Article.objects.delete(id__lte=4
    260 >>> Article.objects.count() 
    261 4L 
     257>>> Article.objects.all() 
     258[Area woman programs in Python, Second article, Third article, Fourth article, Article 6, Default headline, Article 7, Updated article 8] 
     259>>> Article.objects.filter(id__lte=4).delete(
     260>>> Article.objects.all() 
     261[Article 6, Default headline, Article 7, Updated article 8] 
    262262 
    263 >>> Article.objects.delete() 
    264 Traceback (most recent call last): 
    265     ... 
    266 TypeError: SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data. 
    267  
    268 >>> Article.objects.delete(DELETE_ALL=True) 
    269 >>> Article.objects.count() 
    270 0L 
    271  
    272263""" 
    273264 
    274265from django.conf import settings 
  • tests/modeltests/many_to_many/models.py

    old new  
    162162>>> p2.article_set.all().order_by('headline') 
    163163[Oxygen-free diet works wonders] 
    164164 
     165# Recreate the article and Publication we just deleted. 
     166>>> p1 = Publication(id=None, title='The Python Journal') 
     167>>> p1.save() 
     168>>> a2 = Article(id=None, headline='NASA uses Python') 
     169>>> a2.save() 
     170>>> a2.publications.add(p1, p2, p3) 
    165171 
     172# Bulk delete some Publications - references to deleted publications should go 
     173>>> Publication.objects.filter(title__startswith='Science').delete() 
     174>>> Publication.objects.all() 
     175[Highlights for Children, The Python Journal] 
     176>>> Article.objects.all() 
     177[Django lets you build Web apps easily, NASA finds intelligent life on Earth, Oxygen-free diet works wonders, NASA uses Python] 
     178>>> a2.publications.all() 
     179[The Python Journal] 
    166180 
     181# Bulk delete some articles - references to deleted objects should go 
     182>>> q = Article.objects.filter(headline__startswith='Django') 
     183>>> print q 
     184[Django lets you build Web apps easily] 
     185>>> q.delete() 
     186 
     187# After the delete, the QuerySet cache needs to be cleared, and the referenced objects should be gone 
     188>>> print q 
     189[] 
     190>>> p1.article_set.all() 
     191[NASA uses Python] 
     192 
     193 
    167194""" 
  • tests/modeltests/many_to_one/models.py

    old new  
    9494 
    9595# The underlying query only makes one join when a related table is referenced twice. 
    9696>>> query = Article.objects.filter(reporter__first_name__exact='John', reporter__last_name__exact='Smith') 
    97 >>> null, sql, null = query._get_sql_clause(True
     97>>> null, sql, null = query._get_sql_clause(
    9898>>> sql.count('INNER JOIN') 
    99991 
    100100 
     
    163163>>> Reporter.objects.filter(article__reporter__first_name__startswith='John').distinct() 
    164164[John Smith] 
    165165 
    166 # Deletes that require joins are prohibited. 
    167 >>> Article.objects.delete(reporter__first_name__startswith='Jo') 
    168 Traceback (most recent call last): 
    169     ... 
    170 TypeError: Joins are not allowed in this type of query 
    171  
    172166# If you delete a reporter, his articles will be deleted. 
    173167>>> Article.objects.order_by('headline') 
    174168[John's second story, Paul's story, This is a test, This is a test, This is a test] 
    175169>>> Reporter.objects.order_by('first_name') 
    176170[John Smith, Paul Jones] 
    177 >>> r.delete() 
     171>>> r2.delete() 
    178172>>> Article.objects.order_by('headline') 
    179 [Paul's story
     173[John's second story, This is a test, This is a test, This is a test
    180174>>> Reporter.objects.order_by('first_name') 
    181 [Paul Jones
     175[John Smith
    182176 
     177# Deletes using a join in the query 
     178>>> Reporter.objects.filter(article__headline__startswith='This').delete() 
     179>>> Reporter.objects.all() 
     180[] 
     181>>> Article.objects.all() 
     182[] 
     183 
    183184"""