Ticket #28668: on_conflict_postgresql_signals_ids.patch

File on_conflict_postgresql_signals_ids.patch, 13.3 KB (added by Дилян Палаузов, 6 years ago)

In addition to the other file, this one add support for sending post_save for the newly created objects and retrieving the ids of the objects provided as parameters to bulk_create, that already existed in the database.

  • new file django/db/backends/postgresql/compiler.py

    diff --git a/django/db/backends/postgresql/compiler.py b/django/db/backends/postgresql/compiler.py
    new file mode 100644
    index 0000000..a562e34
    - +  
     1from django.db.models.sql import compiler
     2
     3
     4SQLCompiler = compiler.SQLCompiler
     5SQLDeleteCompiler = compiler.SQLDeleteCompiler
     6SQLUpdateCompiler = compiler.SQLUpdateCompiler
     7SQLAggregateCompiler = compiler.SQLAggregateCompiler
     8
     9
     10class SQLInsertCompiler(compiler.SQLInsertCompiler):
     11    def as_sql(self):
     12        """
     13        Create queries that work like "INSERT INTO .. ON CONFLICT DO NOTHING RETURNUNG *"
     14        but return the same amount of rows as in the input, setting NULL on already existing
     15        rows.  The cited query does not return anything for rows that were already in the
     16        database.  The drawback is that the pg-sequence counter increases everytime with
     17        the numers of rows in the input, irrespective of the actually inserted rows.
     18        Works only with PostgreSQL >= 9.5.
     19        """
     20        fields = self.query.fields
     21        if fields and self.connection.pg_version >= 90500 and getattr(self.query, 'on_conflict', '') == 'ignore':
     22            qn = self.quote_name_unless_alias
     23            opts = self.query.get_meta()
     24            return [("WITH r AS (SELECT * FROM(VALUES (" + "),(".join(
     25                ",".join("%s" for f in fields) for obj in self.query.objs
     26            ) + ")) AS g(" + ",".join(qn(field.column) for field in fields) + "))," +
     27                " s AS (INSERT INTO " + qn(opts.db_table) + " (" + ", ".join(
     28                    qn(field.column) for field in fields) +
     29                ") SELECT * FROM r ON CONFLICT DO NOTHING RETURNING *) SELECT s." +
     30                qn(opts.pk.column) + " FROM r LEFT JOIN s USING (" + ", ".join(
     31                    qn(field.column) for field in fields) + ")",
     32                tuple(p for ps in self.assemble_as_sql(fields, [
     33                    [self.prepare_value(field, self.pre_save_val(
     34                        field, obj)) for field in fields] for obj in self.query.objs
     35                ])[1] for p in ps))]
     36        return super().as_sql()
  • django/db/backends/postgresql/operations.py

    diff --git a/django/db/backends/postgresql/operations.py b/django/db/backends/postgresql/operations.py
    index 6f48cfa..b698b50 100644
    a b from django.db.backends.base.operations import BaseDatabaseOperations  
    77
    88class DatabaseOperations(BaseDatabaseOperations):
    99    cast_char_field_without_max_length = 'varchar'
     10    compiler_module = "django.db.backends.postgresql.compiler"
    1011
    1112    def unification_cast_sql(self, output_field):
    1213        internal_type = output_field.get_internal_type()
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    index 71ebf66..7c571a2 100644
    a b  
    11"""
    22The main QuerySet implementation. This provides the public API for the ORM.
    33"""
    4 
    54import copy
     5import functools
    66import operator
    77import warnings
    88from collections import OrderedDict, namedtuple
    from functools import lru_cache  
    1010
    1111from django.conf import settings
    1212from django.core import exceptions
     13from django.contrib.postgres.fields import CIText
    1314from django.db import (
    1415    DJANGO_VERSION_PICKLE_KEY, IntegrityError, connections, router,
    1516    transaction,
    1617)
    17 from django.db.models import DateField, DateTimeField, sql
     18from django.db.models import DateField, DateTimeField, signals, sql
    1819from django.db.models.constants import LOOKUP_SEP
    1920from django.db.models.deletion import Collector
    2021from django.db.models.expressions import F
    class QuerySet:  
    417418            if obj.pk is None:
    418419                obj.pk = obj._meta.pk.get_pk_value_on_save(obj)
    419420
    420     def bulk_create(self, objs, batch_size=None):
     421    def bulk_create(self, objs, batch_size=None, on_conflict=None, send_signal=True, all_ids=False):
    421422        """
    422423        Insert each of the instances into the database. Do *not* call
    423         save() on each of the instances, do not send any pre/post_save
     424        save() on each of the instances, do not send any pre_save
    424425        signals, and do not set the primary key attribute if it is an
    425426        autoincrement field (except if features.can_return_ids_from_bulk_insert=True).
    426427        Multi-table models are not supported.
     428
     429        With postgresql >= 9.5:
     430          * It is possible for objs to contain both instances contained in the database and
     431            new instances.  However the underlaying PG sequence is incremented unnecessary
     432            for each object that was already in the database.
     433          * post save signals are sent to the new instances if send_signal is set.
     434          * If all_ids is True, a second query is sent to the database which retrieves
     435        the IDs of objs, which existed prior to calling bulk_create.  The query matches all
     436        provided fields of the supplied objs.
     437          * If all_ids is a list or a tuple, all fields mentioned in that list are ignored in the
     438        latter call, when considering objects for equality.
    427439        """
    428440        # When you bulk insert you don't get the primary keys back (if it's an
    429441        # autoincrement, except if can_return_ids_from_bulk_insert=True), so
    class QuerySet:  
    445457        for parent in self.model._meta.get_parent_list():
    446458            if parent._meta.concrete_model is not self.model._meta.concrete_model:
    447459                raise ValueError("Can't bulk create a multi-table inherited model")
     460        if on_conflict and on_conflict.lower() != 'ignore':
     461            raise ValueError("'%s' is an invalid value for on_conflict. Allowed values: 'ignore'" % on_conflict)
    448462        if not objs:
    449463            return objs
    450464        self._for_write = True
    class QuerySet:  
    455469        with transaction.atomic(using=self.db, savepoint=False):
    456470            objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs)
    457471            if objs_with_pk:
    458                 self._batched_insert(objs_with_pk, fields, batch_size)
     472                self._batched_insert(objs_with_pk, fields, batch_size, on_conflict=on_conflict)
    459473            if objs_without_pk:
    460474                fields = [f for f in fields if not isinstance(f, AutoField)]
    461                 ids = self._batched_insert(objs_without_pk, fields, batch_size)
     475                ids = self._batched_insert(objs_without_pk, fields, batch_size, on_conflict=on_conflict)
    462476                if connection.features.can_return_ids_from_bulk_insert:
    463477                    assert len(ids) == len(objs_without_pk)
    464478                for obj_without_pk, pk in zip(objs_without_pk, ids):
    class QuerySet:  
    466480                    obj_without_pk._state.adding = False
    467481                    obj_without_pk._state.db = self.db
    468482
     483        if (send_signal or all_ids) and connection.features.can_return_ids_from_bulk_insert:
     484            objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs_without_pk)
     485            if send_signal:
     486                for obj in objs_with_pk:
     487                    signals.post_save.send(sender=obj.__class__, instance=obj, created=True, using=self.db)
     488
     489            if all_ids and objs_without_pk and getattr(connection, 'pg_version', 0) >= 90500:
     490                all_ids = [] if all_ids is True else all_ids
     491                # f.attname in obj.__dict__ and f!= obj._meta.pk means the field is not deferred and is not primary key
     492                obj0 = objs_without_pk[0]
     493                fields = [f.attname for f in obj0._meta.concrete_fields if f.attname
     494                          in obj0.__dict__ and f != obj0._meta.pk and f.attname not in all_ids]
     495                q = [Q(**{f.attname: getattr(obj, f.attname) for f in obj._meta.concrete_fields if f.attname in
     496                          obj.__dict__ and f != obj._meta.pk and f.attname not in all_ids}) for obj in objs_without_pk]
     497                if q:
     498                    output = self.filter(functools.reduce(Q.__or__, q)).values(*fields, obj0._meta.pk.attname)
     499                    for obj in objs_without_pk:
     500                        for o in output:
     501                            if all((getattr(obj, f).lower() == o[f].lower()) if isinstance(
     502                                    obj._meta.get_field(f), CIText) else (getattr(obj, f) == o[f]) for f in fields):
     503                                obj.pk = o[obj0._meta.pk.attname]
     504                                break
    469505        return objs
    470506
    471507    def get_or_create(self, defaults=None, **kwargs):
    class QuerySet:  
    11081144    # PRIVATE METHODS #
    11091145    ###################
    11101146
    1111     def _insert(self, objs, fields, return_id=False, raw=False, using=None):
     1147    def _insert(self, objs, fields, return_id=False, raw=False, using=None, on_conflict=None):
    11121148        """
    11131149        Insert a new record for the given model. This provides an interface to
    11141150        the InsertQuery class and is how Model.save() is implemented.
    class QuerySet:  
    11171153        if using is None:
    11181154            using = self.db
    11191155        query = sql.InsertQuery(self.model)
     1156        if on_conflict:
     1157            query.on_conflict = on_conflict.lower()
    11201158        query.insert_values(fields, objs, raw=raw)
    11211159        return query.get_compiler(using=using).execute_sql(return_id)
    11221160    _insert.alters_data = True
    11231161    _insert.queryset_only = False
    11241162
    1125     def _batched_insert(self, objs, fields, batch_size):
     1163    def _batched_insert(self, objs, fields, batch_size, on_conflict=None):
    11261164        """
    11271165        Helper method for bulk_create() to insert objs one batch at a time.
    11281166        """
    class QuerySet:  
    11311169        inserted_ids = []
    11321170        for item in [objs[i:i + batch_size] for i in range(0, len(objs), batch_size)]:
    11331171            if connections[self.db].features.can_return_ids_from_bulk_insert:
    1134                 inserted_id = self._insert(item, fields=fields, using=self.db, return_id=True)
     1172                inserted_id = self._insert(item, fields=fields, using=self.db, return_id=True, on_conflict=on_conflict)
    11351173                if isinstance(inserted_id, list):
    11361174                    inserted_ids.extend(inserted_id)
    11371175                else:
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    index f2abf1c..08167d9 100644
    a b exists in the database, an :exc:`~django.db.IntegrityError` is raised.  
    19951995``bulk_create()``
    19961996~~~~~~~~~~~~~~~~~
    19971997
    1998 .. method:: bulk_create(objs, batch_size=None)
     1998.. method:: bulk_create(objs, batch_size=None, on_conflict=None, send_signal=True, all_ids=False)
    19991999
    20002000This method inserts the provided list of objects into the database in an
    20012001efficient manner (generally only 1 query, no matter how many objects there
    are)::  
    20092009This has a number of caveats though:
    20102010
    20112011* The model's ``save()`` method will not be called, and the ``pre_save`` and
    2012   ``post_save`` signals will not be sent.
     2012  signal will not be sent.
     2013* If both Postgresql is used and send_signal is True, ``post_save`` signal will be sent.
    20132014* It does not work with child models in a multi-table inheritance scenario.
    20142015* If the model's primary key is an :class:`~django.db.models.AutoField` it
    20152016  does not retrieve and set the primary key attribute, as ``save()`` does,
    The ``batch_size`` parameter controls how many objects are created in a single  
    20352036query. The default is to create all objects in one batch, except for SQLite
    20362037where the default is such that at most 999 variables per query are used.
    20372038
     2039If Postgresql >= 9.5 and `on_conflict='ignore'` are used, contrary to the above statements:
     2040
     2041* It is possible for objs to contain both instances, contained in the database
     2042  prior to the call, and new instances.  The underlaying Postgresql sequence
     2043  is incremented for each object what was already in the database.
     2044* If `all_ids` is True, a second query is sent to the database which retrieves the IDs
     2045  of those objs, that existed prior to calling ``bulk_create()``.  The query matches all
     2046  provided fields of the supplied objs.
     2047* If `all_ids` is a non-empty list, all fields mentioned in that list are
     2048  ignored in the latter query, when considering objects for equality::
     2049
     2050    >>> from django.db import models
     2051
     2052    >>> class T(models.Model):
     2053    ...    d = models.DateTimeField(default=django.utils.timezone.now)
     2054    ...    n = models.IntegerField(unique=True)
     2055
     2056    >>> T.objects.bulk_create([T(n=1), T(n=1)], on_conflict='ignore', all_ids=True)
     2057    # Now the database contains one object with n=1 and a timestamp when the first
     2058    # constructor was called.  The returned list has two objects, and the second object
     2059    # has no pk set.  The cause is that the second T(n=1) has d with a timestamp that
     2060    # is different from the timestamp of the first T(n=1), and querying the database
     2061    # for the second T-object returned no results.  Even if the second object is not
     2062    # inserted into the database, but only the first one, the corresponding Postgresql
     2063    # sequence is increased by two
     2064
     2065    >>> T.objects.bulk_create([T(n=1), T(n=1)], on_conflict='ignore', all_ids=['d'])
     2066    # Now the database will check if there is an object with n=1 and ignore the d field.
     2067    # The pk field of each element in the list will be set. The Postgresql sequence
     2068    # is increased by two.
     2069
    20382070``count()``
    20392071~~~~~~~~~~~
    20402072
Back to Top