Code

Ticket #17: 5737_instance_caching.diff

File 5737_instance_caching.diff, 8.6 KB (added by Brian Rosner <brosner@…>, 7 years ago)

updated patch to #5737 with some minor edits

Line 
1Index: django/db/models/base.py
2===================================================================
3--- django/db/models/base.py    (revision 5737)
4+++ django/db/models/base.py    (working copy)
5@@ -15,6 +15,7 @@
6 from django.utils.encoding import smart_str, force_unicode
7 from django.conf import settings
8 from itertools import izip
9+from weakref import WeakValueDictionary
10 import types
11 import sys
12 import os
13@@ -76,6 +77,21 @@
14         # should only be one class for each model, so we must always return the
15         # registered version.
16         return get_model(new_class._meta.app_label, name, False)
17+   
18+    def __call__(cls, *args, **kwargs):
19+        if not kwargs.pop("disable_instance_cache", False) \
20+            and cls._meta.has_auto_field:
21+            key = cls._get_cache_key(args, kwargs)
22+            if key is not None:
23+                obj = cls.__instance_cache__.get(key)
24+                if obj is None:
25+                    obj = super(ModelBase, cls).__call__(*args, **kwargs)
26+                    cls.__instance_cache__[key] = obj
27+            else:
28+                obj = super(ModelBase, cls).__call__(*args, **kwargs)
29+        else:
30+            obj = super(ModelBase, cls).__call__(*args, **kwargs)
31+        return obj
32 
33 class Model(object):
34     __metaclass__ = ModelBase
35@@ -96,6 +112,23 @@
36 
37     def __ne__(self, other):
38         return not self.__eq__(other)
39+   
40+    def _get_cache_key(cls, args, kwargs):
41+        # this should be calculated *once*, but isn't atm
42+        pk_index = cls._meta.fields.index(cls._meta.pk)
43+        if len(args) > pk_index:
44+            return args[pk_index]
45+        pk = cls._meta.pk
46+        if pk.name in kwargs:
47+            return kwargs[pk.name]
48+        elif pk.attname in kwargs:
49+            return kwargs[pk.attname]
50+        return None
51+    _get_cache_key = classmethod(_get_cache_key)
52+   
53+    def get_cached_instance(cls, key):
54+        return cls.__instance_cache__.get(key)
55+    get_cached_instance = classmethod(get_cached_instance)
56 
57     def __init__(self, *args, **kwargs):
58         dispatcher.send(signal=signals.pre_init, sender=self.__class__, args=args, kwargs=kwargs)
59@@ -196,6 +229,8 @@
60 
61         if hasattr(cls, 'get_absolute_url'):
62             cls.get_absolute_url = curry(get_absolute_url, opts, cls.get_absolute_url)
63+       
64+        cls.__instance_cache__ = WeakValueDictionary()
65 
66         dispatcher.send(signal=signals.class_prepared, sender=cls)
67 
68@@ -254,6 +289,10 @@
69             if self._meta.has_auto_field and not pk_set:
70                 setattr(self, self._meta.pk.attname, backend.get_last_insert_id(cursor, self._meta.db_table, self._meta.pk.column))
71         transaction.commit_unless_managed()
72+       
73+        # write this instance into cache if not already present
74+        if self._meta.has_auto_field:
75+            self.__instance_cache__[self._get_pk_val()] = self
76 
77         # Run any post-save hooks.
78         dispatcher.send(signal=signals.post_save, sender=self.__class__, instance=self)
79@@ -315,6 +354,10 @@
80         self._collect_sub_objects(seen_objs)
81 
82         # Actually delete the objects
83+        if self._meta.has_auto_field:
84+            pk = self._get_pk_val()
85+            if pk is not None:
86+                self.__instance_cache__.pop(pk, None)
87         delete_objects(seen_objs)
88 
89     delete.alters_data = True
90Index: django/db/models/fields/related.py
91===================================================================
92--- django/db/models/fields/related.py  (revision 5737)
93+++ django/db/models/fields/related.py  (working copy)
94@@ -165,12 +165,14 @@
95                 if self.field.null:
96                     return None
97                 raise self.field.rel.to.DoesNotExist
98-            other_field = self.field.rel.get_related_field()
99-            if other_field.rel:
100-                params = {'%s__pk' % self.field.rel.field_name: val}
101-            else:
102-                params = {'%s__exact' % self.field.rel.field_name: val}
103-            rel_obj = self.field.rel.to._default_manager.get(**params)
104+            rel_obj = self.field.rel.to.get_cached_instance(val)
105+            if rel_obj is None:
106+                other_field = self.field.rel.get_related_field()
107+                if other_field.rel:
108+                    params = {'%s__pk' % self.field.rel.field_name: val}
109+                else:
110+                    params = {'%s__exact' % self.field.rel.field_name: val}
111+                rel_obj = self.field.rel.to._default_manager.get(**params)
112             setattr(instance, cache_name, rel_obj)
113             return rel_obj
114 
115Index: django/db/models/query.py
116===================================================================
117--- django/db/models/query.py   (revision 5737)
118+++ django/db/models/query.py   (working copy)
119@@ -1109,12 +1109,19 @@
120     for cls in ordered_classes:
121         seen_objs[cls] = seen_objs[cls].items()
122         seen_objs[cls].sort()
123+        clean_instance_cache = cls.__instance_cache__.pop
124 
125         # Pre notify all instances to be deleted
126         for pk_val, instance in seen_objs[cls]:
127             dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
128 
129         pk_list = [pk for pk,instance in seen_objs[cls]]
130+        # we wipe the cache now; it's *possible* some form of a __get__ lookup
131+        # may reintroduce an item after the fact with the same pk, however
132+        # it is extremely unlikely
133+        for x in pk_list:
134+            clean_instance_cache(x, None)
135+           
136         for related in cls._meta.get_all_related_many_to_many_objects():
137             if not isinstance(related.field, generic.GenericRelation):
138                 for offset in range(0, len(pk_list), GET_ITERATOR_CHUNK_SIZE):
139@@ -1148,6 +1155,8 @@
140     for cls in ordered_classes:
141         seen_objs[cls].reverse()
142         pk_list = [pk for pk,instance in seen_objs[cls]]
143+        for x in pk_list:
144+            clean_instance_cache(x, None)
145         for offset in range(0, len(pk_list), GET_ITERATOR_CHUNK_SIZE):
146             cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
147                 (qn(cls._meta.db_table), qn(cls._meta.pk.column),
148Index: django/core/serializers/xml_serializer.py
149===================================================================
150--- django/core/serializers/xml_serializer.py   (revision 5737)
151+++ django/core/serializers/xml_serializer.py   (working copy)
152@@ -177,6 +177,7 @@
153                     value = field.to_python(getInnerText(field_node).strip())
154                 data[field.name] = value
155 
156+        data["disable_instance_cache"] = True
157         # Return a DeserializedObject so that the m2m data has a place to live.
158         return base.DeserializedObject(Model(**data), m2m_data)
159 
160Index: django/core/serializers/python.py
161===================================================================
162--- django/core/serializers/python.py   (revision 5737)
163+++ django/core/serializers/python.py   (working copy)
164@@ -88,7 +88,8 @@
165             # Handle all other fields
166             else:
167                 data[field.name] = field.to_python(field_value)
168-
169+       
170+        data["disable_instance_cache"] = True
171         yield base.DeserializedObject(Model(**data), m2m_data)
172 
173 def _get_model(model_identifier):
174Index: tests/modeltests/select_related/models.py
175===================================================================
176--- tests/modeltests/select_related/models.py   (revision 5737)
177+++ tests/modeltests/select_related/models.py   (working copy)
178@@ -107,13 +107,13 @@
179 1
180 
181 # select_related() also of course applies to entire lists, not just items.
182-# Without select_related()
183+# Without select_related() (note instance caching still reduces this from 9 to 5)
184 >>> db.reset_queries()
185 >>> world = Species.objects.all()
186 >>> [o.genus.family for o in world]
187 [<Family: Drosophilidae>, <Family: Hominidae>, <Family: Fabaceae>, <Family: Amanitacae>]
188 >>> len(db.connection.queries)
189-9
190+5
191 
192 # With select_related():
193 >>> db.reset_queries()
194@@ -129,23 +129,23 @@
195 >>> pea.genus.family.order.klass.phylum.kingdom.domain
196 <Domain: Eukaryota>
197 
198-# Notice: one few query than above because of depth=1
199+# Notice: instance caching saves the day; would be 7 without.
200 >>> len(db.connection.queries)
201-7
202+1
203 
204 >>> db.reset_queries()
205 >>> pea = Species.objects.select_related(depth=5).get(name="sativum")
206 >>> pea.genus.family.order.klass.phylum.kingdom.domain
207 <Domain: Eukaryota>
208 >>> len(db.connection.queries)
209-3
210+1
211 
212 >>> db.reset_queries()
213 >>> world = Species.objects.all().select_related(depth=2)
214 >>> [o.genus.family.order for o in world]
215 [<Order: Diptera>, <Order: Primates>, <Order: Fabales>, <Order: Agaricales>]
216 >>> len(db.connection.queries)
217-5
218+1
219 
220 # Reset DEBUG to where we found it.
221 >>> settings.DEBUG = False