Ticket #17788: ticket_17788.diff

File ticket_17788.diff, 13.1 KB (added by Anssi Kääriäinen, 13 years ago)
  • django/db/backends/__init__.py

    diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
    index a85e7b9..8ca0fed 100644
    a b class BaseDatabaseOperations(object):  
    469469        This SQL is executed when a table is created.
    470470        """
    471471        return None
     472   
     473    def bulk_batch_size(self, fields, objs):
     474        """
     475        Returns the maximum allowed batch size for the backend. The fields
     476        are the fields we will be inserting in the batch, the objs contains
     477        all the objects to be inserted.
     478
     479        The default return value of None means unlimited. This method must
     480        always return a valuer greater than 0.
     481        """
     482        return None
    472483
    473484    def date_extract_sql(self, lookup_type, field_name):
    474485        """
    class BaseDatabaseOperations(object):  
    506517        during a CREATE TABLE statement.
    507518        """
    508519        return ''
     520   
     521    def distinct_sql(self, fields):
     522        """
     523        Returns an SQL DISTINCT clause which removes duplicate rows from the
     524        result set. If any fields are given, only the given fields are being
     525        checked for duplicates.
     526        """
     527        if fields:
     528            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
     529        else:
     530            return 'DISTINCT'
    509531
    510532    def drop_foreignkey_sql(self):
    511533        """
    class BaseDatabaseOperations(object):  
    562584        """
    563585        raise NotImplementedError('Full-text search is not implemented for this database backend')
    564586
    565     def distinct_sql(self, fields):
    566         """
    567         Returns an SQL DISTINCT clause which removes duplicate rows from the
    568         result set. If any fields are given, only the given fields are being
    569         checked for duplicates.
    570         """
    571         if fields:
    572             raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
    573         else:
    574             return 'DISTINCT'
    575 
    576587    def last_executed_query(self, cursor, sql, params):
    577588        """
    578589        Returns a string of the query last executed by the given cursor, with
  • django/db/backends/sqlite3/base.py

    diff --git a/django/db/backends/sqlite3/base.py b/django/db/backends/sqlite3/base.py
    index 8f883e2..e4e5c52 100644
    a b class DatabaseFeatures(BaseDatabaseFeatures):  
    8383    supports_1000_query_parameters = False
    8484    supports_mixed_date_datetime_comparisons = False
    8585    has_bulk_insert = True
    86     can_combine_inserts_with_and_without_auto_increment_pk = True
     86    can_combine_inserts_with_and_without_auto_increment_pk = False
    8787
    8888    def _supports_stddev(self):
    8989        """Confirm support for STDDEV and related stats functions
    class DatabaseFeatures(BaseDatabaseFeatures):  
    104104        return has_support
    105105
    106106class DatabaseOperations(BaseDatabaseOperations):
     107    def bulk_batch_size(self, fields, objs):
     108        """
     109        SQLite has a limit of 1000 variables per query.
     110        """
     111        return max(999 / len(fields), 1)
     112
    107113    def date_extract_sql(self, lookup_type, field_name):
    108114        # sqlite doesn't support extract, so we fake it with the user-defined
    109115        # function django_extract that's registered in connect(). Note that
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    index 3d4fd17..5fa3fbc 100644
    a b class QuerySet(object):  
    382382        obj.save(force_insert=True, using=self.db)
    383383        return obj
    384384
    385     def bulk_create(self, objs):
     385    def bulk_create(self, objs, batch_size=None):
    386386        """
    387387        Inserts each of the instances into the database. This does *not* call
    388388        save() on each of the instances, does not send any pre/post save
    class QuerySet(object):  
    395395        # this could be implemented if you didn't have an autoincrement pk,
    396396        # and 2) you could do it by doing O(n) normal inserts into the parent
    397397        # tables to get the primary keys back, and then doing a single bulk
    398         # insert into the childmost table. We're punting on these for now
    399         # because they are relatively rare cases.
     398        # insert into the childmost table. Some databases might allow doing
     399        # this by using RETURNING clause for the insert query. We're punting
     400        # on these for now because they are relatively rare cases.
     401        assert batch_size is None or batch_size > 0
    400402        if self.model._meta.parents:
    401403            raise ValueError("Can't bulk create an inherited model")
    402404        if not objs:
    class QuerySet(object):  
    412414        try:
    413415            if (connection.features.can_combine_inserts_with_and_without_auto_increment_pk
    414416                and self.model._meta.has_auto_field):
    415                 self.model._base_manager._insert(objs, fields=fields, using=self.db)
     417                self._batched_insert(objs, fields, batch_size)
    416418            else:
    417419                objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs)
    418420                if objs_with_pk:
    419                     self.model._base_manager._insert(objs_with_pk, fields=fields, using=self.db)
     421                    self._batched_insert(objs_with_pk, fields, batch_size)
    420422                if objs_without_pk:
    421                     self.model._base_manager._insert(objs_without_pk, fields=[f for f in fields if not isinstance(f, AutoField)], using=self.db)
     423                    fields= [f for f in fields if not isinstance(f, AutoField)]
     424                    self._batched_insert(objs_without_pk, fields, batch_size)
    422425            if forced_managed:
    423426                transaction.commit(using=self.db)
    424427            else:
    class QuerySet(object):  
    854857    ###################
    855858    # PRIVATE METHODS #
    856859    ###################
     860    def _batched_insert(self, objs, fields, def_batch_size):
     861        """
     862        A little helper method for bulk_insert to insert the bulk one batch
     863        at a time. Inserts recursively a batch from the front of the bulk and
     864        then _batched_insert() the remaining objects again.
     865        """
     866        if not objs:
     867            return
     868        connection = connections[self.db]
     869        batch_size = (def_batch_size
     870                      or connection.ops.bulk_batch_size(fields, objs))
     871        batch, remaining = ((objs[0:batch_size], objs[batch_size:])
     872                            if batch_size else (objs, []))
     873        self.model._base_manager._insert(batch, fields=fields,
     874                                         using=self.db)
     875        self._batched_insert(remaining, fields, def_batch_size)
    857876
    858877    def _clone(self, klass=None, setup=False, **kwargs):
    859878        if klass is None:
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    index b20c6e3..991e0db 100644
    a b has a side effect on your data. For more, see `Safe methods`_ in the HTTP spec.  
    13491349bulk_create
    13501350~~~~~~~~~~~
    13511351
    1352 .. method:: bulk_create(objs)
     1352.. method:: bulk_create(objs, batch_size=None)
    13531353
    13541354.. versionadded:: 1.4
    13551355
    This has a number of caveats though:  
    13711371* If the model's primary key is an :class:`~django.db.models.AutoField` it
    13721372  does not retrieve and set the primary key attribute, as ``save()`` does.
    13731373
    1374 .. admonition:: Limits of SQLite
     1374The ``batch_size`` parameter controls how many objects are created in single
     1375query. The default is to create all objects in one batch, except for SQLite
     1376where the default is such that at maximum 999 variables per query is used.
    13751377
    1376     SQLite sets a limit on the number of parameters per SQL statement. The
    1377     maximum is defined by the SQLITE_MAX_VARIABLE_NUMBER_ compilation option,
    1378     which defaults to 999. For instance, if your model has 8 fields (including
    1379     the primary key), you cannot create more than 999 // 8 = 124 instances at
    1380     a time. If you exceed this limit, you'll get an exception::
    1381 
    1382         django.db.utils.DatabaseError: too many SQL variables
    1383 
    1384     If your application's performance requirements exceed SQLite's limits, you
    1385     should switch to another database engine, such as PostgreSQL.
    1386 
    1387 .. _SQLITE_MAX_VARIABLE_NUMBER: http://sqlite.org/limits.html#max_variable_number
     1378.. versionadded:: 1.5
     1379    The ``batch_size`` parameter was added in version 1.5.
    13881380
    13891381count
    13901382~~~~~
  • docs/releases/1.5.txt

    diff --git a/docs/releases/1.5.txt b/docs/releases/1.5.txt
    index 4d9572d..0466867 100644
    a b Django 1.5 also includes several smaller improvements worth noting:  
    4141* The template engine now interprets ``True``, ``False`` and ``None`` as the
    4242  corresponding Python objects.
    4343
     44* :meth:`QuerySet.bulk_create()
     45  <django.db.models.query.QuerySet.bulk_create>` has now a batch_size
     46  argument. By default the batch_size is unlimited except for SQLite where
     47  single batch is limited so that 1000 parameters per query isn't exceeded.
     48
    4449Backwards incompatible changes in 1.5
    4550=====================================
    4651
  • tests/regressiontests/bulk_create/models.py

    diff --git a/tests/regressiontests/bulk_create/models.py b/tests/regressiontests/bulk_create/models.py
    index a4c611d..bc685bb 100644
    a b class Pizzeria(Restaurant):  
    1818    pass
    1919
    2020class State(models.Model):
    21     two_letter_code = models.CharField(max_length=2, primary_key=True)
    22  No newline at end of file
     21    two_letter_code = models.CharField(max_length=2, primary_key=True)
     22
     23class TwoFields(models.Model):
     24    f1 = models.IntegerField(unique=True)
     25    f2 = models.IntegerField(unique=True)
  • tests/regressiontests/bulk_create/tests.py

    diff --git a/tests/regressiontests/bulk_create/tests.py b/tests/regressiontests/bulk_create/tests.py
    index 0b55f63..e332335 100644
    a b from __future__ import absolute_import  
    22
    33from operator import attrgetter
    44
    5 from django.test import TestCase, skipIfDBFeature, skipUnlessDBFeature
     5from django.db import connection
     6from django.test import TestCase, skipIfDBFeature
     7from django.test.utils import override_settings
    68
    7 from .models import Country, Restaurant, Pizzeria, State
     9from .models import Country, Restaurant, Pizzeria, State, TwoFields
    810
    911
    1012class BulkCreateTests(TestCase):
    class BulkCreateTests(TestCase):  
    2729        self.assertEqual(created, [])
    2830        self.assertEqual(Country.objects.count(), 4)
    2931
    30     @skipUnlessDBFeature("has_bulk_insert")
    3132    def test_efficiency(self):
    3233        with self.assertNumQueries(1):
    3334            Country.objects.bulk_create(self.data)
    class BulkCreateTests(TestCase):  
    6970        invalid_country = Country(id=0, name='Poland', iso_two_letter='PL')
    7071        with self.assertRaises(ValueError):
    7172            Country.objects.bulk_create([valid_country, invalid_country])
     73
     74    def test_large_batch(self):
     75        with override_settings(DEBUG=True):
     76            connection.queries = []
     77            TwoFields.objects.bulk_create([
     78                   TwoFields(f1=i, f2=i+1) for i in range(0, 1001)
     79                ])
     80            self.assertTrue(len(connection.queries) < 10)
     81        self.assertEqual(TwoFields.objects.count(), 1001)
     82        self.assertEqual(
     83            TwoFields.objects.filter(f1__gte=450, f1__lte=550).count(),
     84            101)
     85        self.assertEqual(TwoFields.objects.filter(f2__gte=901).count(), 101)
     86
     87    def test_large_batch_mixed(self):
     88        """
     89        Test inserting a large batch with objects having primary key set
     90        mixed together with objects without PK set.
     91        """
     92        with override_settings(DEBUG=True):
     93            connection.queries = []
     94            TwoFields.objects.bulk_create([
     95                TwoFields(id=i if i % 2 == 0 else None, f1=i, f2=i+1)
     96                for i in range(100000, 101000)])
     97            self.assertTrue(len(connection.queries) < 10)
     98        self.assertEqual(TwoFields.objects.count(), 1000)
     99        # We can't assume much about the ID's created, except that the above
     100        # created IDs must exists.
     101        id_range = range(100000, 101000, 2)
     102        self.assertEqual(TwoFields.objects.filter(id__in=id_range).count(), 500)
     103
     104    def test_explicit_batch_size(self):
     105        objs = [TwoFields(f1=i, f2=i) for i in range(0, 100)]
     106        with self.assertNumQueries(2):
     107            TwoFields.objects.bulk_create(objs, 50)
     108        TwoFields.objects.all().delete()
     109        with self.assertNumQueries(1):
     110            TwoFields.objects.bulk_create(objs, len(objs))
  • tests/regressiontests/queries/tests.py

    diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
    index ded3e8f..ed71be8 100644
    a b class ConditionalTests(BaseQuerysetTest):  
    18071807        # Test that the "in" lookup works with lists of 1000 items or more.
    18081808        Number.objects.all().delete()
    18091809        numbers = range(2500)
    1810         for num in numbers:
    1811             _ = Number.objects.create(num=num)
     1810        Number.objects.bulk_create(Number(num=num) for num in numbers)
    18121811        self.assertEqual(
    18131812            Number.objects.filter(num__in=numbers[:1000]).count(),
    18141813            1000
Back to Top