Ticket #28668: on_conflict_postgresql_signals_ids.patch
File on_conflict_postgresql_signals_ids.patch, 13.3 KB (added by , 7 years ago) |
---|
-
new file django/db/backends/postgresql/compiler.py
diff --git a/django/db/backends/postgresql/compiler.py b/django/db/backends/postgresql/compiler.py new file mode 100644 index 0000000..a562e34
- + 1 from django.db.models.sql import compiler 2 3 4 SQLCompiler = compiler.SQLCompiler 5 SQLDeleteCompiler = compiler.SQLDeleteCompiler 6 SQLUpdateCompiler = compiler.SQLUpdateCompiler 7 SQLAggregateCompiler = compiler.SQLAggregateCompiler 8 9 10 class SQLInsertCompiler(compiler.SQLInsertCompiler): 11 def as_sql(self): 12 """ 13 Create queries that work like "INSERT INTO .. ON CONFLICT DO NOTHING RETURNUNG *" 14 but return the same amount of rows as in the input, setting NULL on already existing 15 rows. The cited query does not return anything for rows that were already in the 16 database. The drawback is that the pg-sequence counter increases everytime with 17 the numers of rows in the input, irrespective of the actually inserted rows. 18 Works only with PostgreSQL >= 9.5. 19 """ 20 fields = self.query.fields 21 if fields and self.connection.pg_version >= 90500 and getattr(self.query, 'on_conflict', '') == 'ignore': 22 qn = self.quote_name_unless_alias 23 opts = self.query.get_meta() 24 return [("WITH r AS (SELECT * FROM(VALUES (" + "),(".join( 25 ",".join("%s" for f in fields) for obj in self.query.objs 26 ) + ")) AS g(" + ",".join(qn(field.column) for field in fields) + "))," + 27 " s AS (INSERT INTO " + qn(opts.db_table) + " (" + ", ".join( 28 qn(field.column) for field in fields) + 29 ") SELECT * FROM r ON CONFLICT DO NOTHING RETURNING *) SELECT s." + 30 qn(opts.pk.column) + " FROM r LEFT JOIN s USING (" + ", ".join( 31 qn(field.column) for field in fields) + ")", 32 tuple(p for ps in self.assemble_as_sql(fields, [ 33 [self.prepare_value(field, self.pre_save_val( 34 field, obj)) for field in fields] for obj in self.query.objs 35 ])[1] for p in ps))] 36 return super().as_sql() -
django/db/backends/postgresql/operations.py
diff --git a/django/db/backends/postgresql/operations.py b/django/db/backends/postgresql/operations.py index 6f48cfa..b698b50 100644
a b from django.db.backends.base.operations import BaseDatabaseOperations 7 7 8 8 class DatabaseOperations(BaseDatabaseOperations): 9 9 cast_char_field_without_max_length = 'varchar' 10 compiler_module = "django.db.backends.postgresql.compiler" 10 11 11 12 def unification_cast_sql(self, output_field): 12 13 internal_type = output_field.get_internal_type() -
django/db/models/query.py
diff --git a/django/db/models/query.py b/django/db/models/query.py index 71ebf66..7c571a2 100644
a b 1 1 """ 2 2 The main QuerySet implementation. This provides the public API for the ORM. 3 3 """ 4 5 4 import copy 5 import functools 6 6 import operator 7 7 import warnings 8 8 from collections import OrderedDict, namedtuple … … from functools import lru_cache 10 10 11 11 from django.conf import settings 12 12 from django.core import exceptions 13 from django.contrib.postgres.fields import CIText 13 14 from django.db import ( 14 15 DJANGO_VERSION_PICKLE_KEY, IntegrityError, connections, router, 15 16 transaction, 16 17 ) 17 from django.db.models import DateField, DateTimeField, s ql18 from django.db.models import DateField, DateTimeField, signals, sql 18 19 from django.db.models.constants import LOOKUP_SEP 19 20 from django.db.models.deletion import Collector 20 21 from django.db.models.expressions import F … … class QuerySet: 417 418 if obj.pk is None: 418 419 obj.pk = obj._meta.pk.get_pk_value_on_save(obj) 419 420 420 def bulk_create(self, objs, batch_size=None ):421 def bulk_create(self, objs, batch_size=None, on_conflict=None, send_signal=True, all_ids=False): 421 422 """ 422 423 Insert each of the instances into the database. Do *not* call 423 save() on each of the instances, do not send any pre /post_save424 save() on each of the instances, do not send any pre_save 424 425 signals, and do not set the primary key attribute if it is an 425 426 autoincrement field (except if features.can_return_ids_from_bulk_insert=True). 426 427 Multi-table models are not supported. 428 429 With postgresql >= 9.5: 430 * It is possible for objs to contain both instances contained in the database and 431 new instances. However the underlaying PG sequence is incremented unnecessary 432 for each object that was already in the database. 433 * post save signals are sent to the new instances if send_signal is set. 434 * If all_ids is True, a second query is sent to the database which retrieves 435 the IDs of objs, which existed prior to calling bulk_create. The query matches all 436 provided fields of the supplied objs. 437 * If all_ids is a list or a tuple, all fields mentioned in that list are ignored in the 438 latter call, when considering objects for equality. 427 439 """ 428 440 # When you bulk insert you don't get the primary keys back (if it's an 429 441 # autoincrement, except if can_return_ids_from_bulk_insert=True), so … … class QuerySet: 445 457 for parent in self.model._meta.get_parent_list(): 446 458 if parent._meta.concrete_model is not self.model._meta.concrete_model: 447 459 raise ValueError("Can't bulk create a multi-table inherited model") 460 if on_conflict and on_conflict.lower() != 'ignore': 461 raise ValueError("'%s' is an invalid value for on_conflict. Allowed values: 'ignore'" % on_conflict) 448 462 if not objs: 449 463 return objs 450 464 self._for_write = True … … class QuerySet: 455 469 with transaction.atomic(using=self.db, savepoint=False): 456 470 objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs) 457 471 if objs_with_pk: 458 self._batched_insert(objs_with_pk, fields, batch_size )472 self._batched_insert(objs_with_pk, fields, batch_size, on_conflict=on_conflict) 459 473 if objs_without_pk: 460 474 fields = [f for f in fields if not isinstance(f, AutoField)] 461 ids = self._batched_insert(objs_without_pk, fields, batch_size )475 ids = self._batched_insert(objs_without_pk, fields, batch_size, on_conflict=on_conflict) 462 476 if connection.features.can_return_ids_from_bulk_insert: 463 477 assert len(ids) == len(objs_without_pk) 464 478 for obj_without_pk, pk in zip(objs_without_pk, ids): … … class QuerySet: 466 480 obj_without_pk._state.adding = False 467 481 obj_without_pk._state.db = self.db 468 482 483 if (send_signal or all_ids) and connection.features.can_return_ids_from_bulk_insert: 484 objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs_without_pk) 485 if send_signal: 486 for obj in objs_with_pk: 487 signals.post_save.send(sender=obj.__class__, instance=obj, created=True, using=self.db) 488 489 if all_ids and objs_without_pk and getattr(connection, 'pg_version', 0) >= 90500: 490 all_ids = [] if all_ids is True else all_ids 491 # f.attname in obj.__dict__ and f!= obj._meta.pk means the field is not deferred and is not primary key 492 obj0 = objs_without_pk[0] 493 fields = [f.attname for f in obj0._meta.concrete_fields if f.attname 494 in obj0.__dict__ and f != obj0._meta.pk and f.attname not in all_ids] 495 q = [Q(**{f.attname: getattr(obj, f.attname) for f in obj._meta.concrete_fields if f.attname in 496 obj.__dict__ and f != obj._meta.pk and f.attname not in all_ids}) for obj in objs_without_pk] 497 if q: 498 output = self.filter(functools.reduce(Q.__or__, q)).values(*fields, obj0._meta.pk.attname) 499 for obj in objs_without_pk: 500 for o in output: 501 if all((getattr(obj, f).lower() == o[f].lower()) if isinstance( 502 obj._meta.get_field(f), CIText) else (getattr(obj, f) == o[f]) for f in fields): 503 obj.pk = o[obj0._meta.pk.attname] 504 break 469 505 return objs 470 506 471 507 def get_or_create(self, defaults=None, **kwargs): … … class QuerySet: 1108 1144 # PRIVATE METHODS # 1109 1145 ################### 1110 1146 1111 def _insert(self, objs, fields, return_id=False, raw=False, using=None ):1147 def _insert(self, objs, fields, return_id=False, raw=False, using=None, on_conflict=None): 1112 1148 """ 1113 1149 Insert a new record for the given model. This provides an interface to 1114 1150 the InsertQuery class and is how Model.save() is implemented. … … class QuerySet: 1117 1153 if using is None: 1118 1154 using = self.db 1119 1155 query = sql.InsertQuery(self.model) 1156 if on_conflict: 1157 query.on_conflict = on_conflict.lower() 1120 1158 query.insert_values(fields, objs, raw=raw) 1121 1159 return query.get_compiler(using=using).execute_sql(return_id) 1122 1160 _insert.alters_data = True 1123 1161 _insert.queryset_only = False 1124 1162 1125 def _batched_insert(self, objs, fields, batch_size ):1163 def _batched_insert(self, objs, fields, batch_size, on_conflict=None): 1126 1164 """ 1127 1165 Helper method for bulk_create() to insert objs one batch at a time. 1128 1166 """ … … class QuerySet: 1131 1169 inserted_ids = [] 1132 1170 for item in [objs[i:i + batch_size] for i in range(0, len(objs), batch_size)]: 1133 1171 if connections[self.db].features.can_return_ids_from_bulk_insert: 1134 inserted_id = self._insert(item, fields=fields, using=self.db, return_id=True )1172 inserted_id = self._insert(item, fields=fields, using=self.db, return_id=True, on_conflict=on_conflict) 1135 1173 if isinstance(inserted_id, list): 1136 1174 inserted_ids.extend(inserted_id) 1137 1175 else: -
docs/ref/models/querysets.txt
diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index f2abf1c..08167d9 100644
a b exists in the database, an :exc:`~django.db.IntegrityError` is raised. 1995 1995 ``bulk_create()`` 1996 1996 ~~~~~~~~~~~~~~~~~ 1997 1997 1998 .. method:: bulk_create(objs, batch_size=None )1998 .. method:: bulk_create(objs, batch_size=None, on_conflict=None, send_signal=True, all_ids=False) 1999 1999 2000 2000 This method inserts the provided list of objects into the database in an 2001 2001 efficient manner (generally only 1 query, no matter how many objects there … … are):: 2009 2009 This has a number of caveats though: 2010 2010 2011 2011 * The model's ``save()`` method will not be called, and the ``pre_save`` and 2012 ``post_save`` signals will not be sent. 2012 signal will not be sent. 2013 * If both Postgresql is used and send_signal is True, ``post_save`` signal will be sent. 2013 2014 * It does not work with child models in a multi-table inheritance scenario. 2014 2015 * If the model's primary key is an :class:`~django.db.models.AutoField` it 2015 2016 does not retrieve and set the primary key attribute, as ``save()`` does, … … The ``batch_size`` parameter controls how many objects are created in a single 2035 2036 query. The default is to create all objects in one batch, except for SQLite 2036 2037 where the default is such that at most 999 variables per query are used. 2037 2038 2039 If Postgresql >= 9.5 and `on_conflict='ignore'` are used, contrary to the above statements: 2040 2041 * It is possible for objs to contain both instances, contained in the database 2042 prior to the call, and new instances. The underlaying Postgresql sequence 2043 is incremented for each object what was already in the database. 2044 * If `all_ids` is True, a second query is sent to the database which retrieves the IDs 2045 of those objs, that existed prior to calling ``bulk_create()``. The query matches all 2046 provided fields of the supplied objs. 2047 * If `all_ids` is a non-empty list, all fields mentioned in that list are 2048 ignored in the latter query, when considering objects for equality:: 2049 2050 >>> from django.db import models 2051 2052 >>> class T(models.Model): 2053 ... d = models.DateTimeField(default=django.utils.timezone.now) 2054 ... n = models.IntegerField(unique=True) 2055 2056 >>> T.objects.bulk_create([T(n=1), T(n=1)], on_conflict='ignore', all_ids=True) 2057 # Now the database contains one object with n=1 and a timestamp when the first 2058 # constructor was called. The returned list has two objects, and the second object 2059 # has no pk set. The cause is that the second T(n=1) has d with a timestamp that 2060 # is different from the timestamp of the first T(n=1), and querying the database 2061 # for the second T-object returned no results. Even if the second object is not 2062 # inserted into the database, but only the first one, the corresponding Postgresql 2063 # sequence is increased by two 2064 2065 >>> T.objects.bulk_create([T(n=1), T(n=1)], on_conflict='ignore', all_ids=['d']) 2066 # Now the database will check if there is an object with n=1 and ignore the d field. 2067 # The pk field of each element in the list will be set. The Postgresql sequence 2068 # is increased by two. 2069 2038 2070 ``count()`` 2039 2071 ~~~~~~~~~~~ 2040 2072