Django

Code

Ticket #17: 17.diff

File 17.diff, 19.5 kB (added by PhiR, 10 months ago)

added tests, and style issues

  • django/db/models/base.py

    old new  
    1515from django.utils.encoding import smart_str, force_unicode, smart_unicode 
    1616from django.conf import settings 
    1717from itertools import izip 
     18from weakref import WeakValueDictionary 
    1819import types 
    1920import sys 
    2021import os 
     
    7778        # registered version. 
    7879        return get_model(new_class._meta.app_label, name, False) 
    7980 
     81    def __call__(cls, *args, **kwargs): 
     82        """ 
     83        this method will either create an instance (by calling the default implementation) 
     84        or try to retrieve one from the class-wide cache by infering the pk value from  
     85        args and kwargs. If instance caching is enabled for this class, the cache is  
     86        populated whenever possible (ie when it is possible to infer the pk value). If 'meta__disable_caching' 
     87        is set to True in kwargs, then the instance is constructed and we flush  
     88        the associated cache entry.  
     89        """ 
     90        def new_instance(): 
     91            return super(ModelBase, cls).__call__(*args, **kwargs) 
     92         
     93        cache_this_instance = cls.instance_caching_enabled() 
     94        # we always pop those settings from kwargs, the instance shouldn't see this 
     95        if kwargs.pop('meta__disable_caching', False): 
     96            # user explicitely requested not to use cache, we flush the cache to prevent inconsitencies 
     97            cls._flush_cached_by_key(cls._get_cache_key(args, kwargs)) 
     98            cache_this_instance = False 
     99             
     100        # simplest case, just create a new instance every time  
     101        if not cache_this_instance: 
     102            return new_instance() 
     103         
     104        instance_key = cls._get_cache_key(args, kwargs) 
     105        # depending on the arguments, we might not be able to infer the PK, so in that case we create a new instance 
     106        if instance_key is None: 
     107            cls._instance_cache_nokey_misses += 1 
     108            return new_instance() 
     109 
     110        cached_instance = cls.get_cached_instance(instance_key) 
     111        if cached_instance is None: 
     112            cached_instance = new_instance() 
     113            cls.cache_instance(cached_instance) 
     114 
     115        return cached_instance 
     116 
    80117class Model(object): 
    81118    __metaclass__ = ModelBase 
    82119 
     
    97134    def __ne__(self, other): 
    98135        return not self.__eq__(other) 
    99136 
     137    def _get_cache_key(cls, args, kwargs): 
     138        """ 
     139        This method is used by the caching subsystem to infer the PK value from the constructor arguments.  
     140        It is used to decide if an instance has to be built or is already in the cache.  
     141        """ 
     142        result = None 
     143        pk = cls._meta.pk 
     144        # get the index of the pk in the class fields. this should be calculated *once*, but isn't atm 
     145        pk_position = cls._meta.fields.index(pk) 
     146        if len(args) > pk_position: 
     147            # if it's in the args, we can get it easily by index 
     148            result = args[pk_position] 
     149        elif pk.attname in kwargs: 
     150            # retrieve the pk value. Note that we use attname instead of name, to handle the case where the pk is a  
     151            # a ForeignKey. 
     152            result = kwargs[pk.attname] 
     153        elif pk.name != pk.attname and pk.name in kwargs: 
     154            # ok we couldn't find the value, but maybe it's a FK and we can find the corresponding object instead 
     155            result = kwargs[pk.name] 
     156         
     157        if result is not None and isinstance(result, Model): 
     158            # if the pk value happens to be a model instance (which can happen wich a FK), we'd rather use its own pk as the key 
     159            result = result._get_pk_val() 
     160        return result 
     161    _get_cache_key = classmethod(_get_cache_key) 
     162 
     163    def get_cached_instance(cls, id): 
     164        """ 
     165        Method to retrieve a cached instance by pk value. Returns None when not found  
     166        (which will always be the case when caching is disabled for this class). Please  
     167        note that the lookup will be done even when instance caching is disabled, thus  
     168        generating a miss in the stats. 
     169        """ 
     170        result = cls.__instance_cache__.get(id) 
     171        if result is None: 
     172            cls._instance_cache_misses += 1 
     173        else: 
     174            cls._instance_cache_hits += 1 
     175        return result 
     176    get_cached_instance = classmethod(get_cached_instance) 
     177 
     178    def cache_instance(cls, instance): 
     179        """ 
     180        Method to store an instance in the cache. TODO: add a store counter in the stats  
     181        """ 
     182        if cls.instance_caching_enabled() and instance._get_pk_val() is not None: 
     183            cls.__instance_cache__[instance._get_pk_val()] = instance 
     184    cache_instance = classmethod(cache_instance) 
     185 
     186    def _flush_cached_by_key(cls, key): 
     187        if cls.__instance_cache__.pop(key, None) is not None: 
     188            cls._instance_cache_flushes += 1 
     189    _flush_cached_by_key = classmethod(_flush_cached_by_key) 
     190         
     191    def flush_cached_instance(cls, instance): 
     192        """ 
     193        Method to flush an instance from the cache. The instance will always be flushed from the cache,  
     194        since this is most likely called from delete(), and we want to make sure we don't cache dead objects. 
     195        We do not test the pk value because delete() does it and it will fail silently anyway.  
     196        """ 
     197        if cls.instance_caching_enabled(): 
     198            cls._flush_cached_by_key(instance._get_pk_val()) 
     199    flush_cached_instance = classmethod(flush_cached_instance) 
     200 
     201    def instance_caching_enabled(cls): 
     202        """ 
     203        Accessor for the cache settings. 
     204        """ 
     205        # cache is off by default!  
     206        return getattr(cls, '_meta__instance_caching', False) 
     207    instance_caching_enabled = classmethod(instance_caching_enabled) 
     208 
     209    def set_instance_caching(cls, enable): 
     210        """ 
     211        Accessor for the cache settings. Note that the cache is flushed and the stats reset when 
     212        the settings are switched (ie enabling the cache multiple times will not flush).  
     213        """ 
     214        current_settings = cls.instance_caching_enabled() 
     215        cls._meta__instance_caching = enable 
     216        # completely flush the cache every time the settings are changed  
     217        if enable != current_settings: 
     218            cls.__instance_cache__.clear() 
     219            cls.instance_caching_stats_reset() 
     220    set_instance_caching = classmethod(set_instance_caching) 
     221     
     222    def instance_caching_stats_reset(cls): 
     223        # also used to init the stats in '_prepare()' 
     224        cls._instance_cache_hits = 0 
     225        cls._instance_cache_misses = 0 
     226        cls._instance_cache_nokey_misses = 0 
     227        cls._instance_cache_flushes = 0 
     228    instance_caching_stats_reset = classmethod(instance_caching_stats_reset) 
     229     
     230    def instance_caching_stats(cls): 
     231        return {'enabled': cls.instance_caching_enabled(),  
     232                'hits' : cls._instance_cache_hits,  
     233                'misses': cls._instance_cache_misses,  
     234                'flushes': cls._instance_cache_flushes,  
     235                'misses_nokey': cls._instance_cache_nokey_misses,  
     236                'cache_size': len(cls.__instance_cache__) } 
     237    instance_caching_stats = classmethod(instance_caching_stats) 
     238         
    100239    def __init__(self, *args, **kwargs): 
    101240        dispatcher.send(signal=signals.pre_init, sender=self.__class__, args=args, kwargs=kwargs) 
    102241 
     
    197336        if hasattr(cls, 'get_absolute_url'): 
    198337            cls.get_absolute_url = curry(get_absolute_url, opts, cls.get_absolute_url) 
    199338 
     339        cls.__instance_cache__ = WeakValueDictionary() 
     340        cls.instance_caching_stats_reset() 
     341        # enable the cache according to user preferences (off by default) 
     342        # FIXME better interface for setting this value (meta class attribute ?) 
     343        cls.set_instance_caching(getattr(cls, 'meta__instance_caching', False)) 
     344 
    200345        dispatcher.send(signal=signals.class_prepared, sender=cls) 
    201346 
    202347    _prepare = classmethod(_prepare) 
     
    261406                setattr(self, self._meta.pk.attname, connection.ops.last_insert_id(cursor, self._meta.db_table, self._meta.pk.column)) 
    262407        transaction.commit_unless_managed() 
    263408 
     409        # if we're a new instance that hasn't been written in; save ourself. 
     410        self.__class__.cache_instance(self) 
     411 
    264412        # Run any post-save hooks. 
    265413        dispatcher.send(signal=signals.post_save, sender=self.__class__, 
    266414                instance=self, created=(not record_exists)) 
     
    321469        seen_objs = SortedDict() 
    322470        self._collect_sub_objects(seen_objs) 
    323471 
     472        # remove ourself from the cache 
     473        self.__class__.flush_cached_instance(self) 
    324474        # Actually delete the objects 
    325475        delete_objects(seen_objs) 
    326476 
  • django/db/models/fields/related.py

    old new  
    159159        try: 
    160160            return getattr(instance, cache_name) 
    161161        except AttributeError: 
     162            related_cls = self.field.rel.to 
    162163            val = getattr(instance, self.field.attname) 
    163164            if val is None: 
    164165                # If NULL is an allowed value, return it. 
    165166                if self.field.null: 
    166167                    return None 
    167                 raise self.field.rel.to.DoesNotExist 
    168             other_field = self.field.rel.get_related_field() 
    169             if other_field.rel: 
    170                 params = {'%s__pk' % self.field.rel.field_name: val} 
     168                raise related_cls.DoesNotExist 
     169            # try to get a cached instance, and if that fails retrieve it from the db  
     170            # FIXME TEST THIS i'm not sure val is really the object's pk ...  
     171            if related_cls.instance_caching_enabled(): 
     172                rel_obj = related_cls.get_cached_instance(val) 
    171173            else: 
    172                 params = {'%s__exact' % self.field.rel.field_name: val} 
    173             rel_obj = self.field.rel.to._default_manager.get(**params) 
     174                rel_obj = None 
     175            if rel_obj is None: 
     176                other_field = self.field.rel.get_related_field() 
     177                if other_field.rel: 
     178                    params = {'%s__pk' % self.field.rel.field_name: val} 
     179                else: 
     180                    params = {'%s__exact' % self.field.rel.field_name: val} 
     181                rel_obj = related_cls._default_manager.get(**params) 
    174182            setattr(instance, cache_name, rel_obj) 
    175183            return rel_obj 
    176184 
  • django/db/models/query.py

    old new  
    11341134            dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance) 
    11351135 
    11361136        pk_list = [pk for pk,instance in seen_objs[cls]] 
     1137        # we wipe the cache now; it's *possible* some form of a __get__ lookup may reintroduce an item after 
     1138        # the fact with the same pk (extremely unlikely) 
     1139        for instance in seen_objs.values(): 
     1140            cls.flush_cached_instance(instance) 
     1141 
    11371142        for related in cls._meta.get_all_related_many_to_many_objects(): 
    11381143            if not isinstance(related.field, generic.GenericRelation): 
    11391144                for offset in range(0, len(pk_list), GET_ITERATOR_CHUNK_SIZE): 
     
    11671172    for cls in ordered_classes: 
    11681173        seen_objs[cls].reverse() 
    11691174        pk_list = [pk for pk,instance in seen_objs[cls]] 
     1175        for instance in seen_objs.values(): 
     1176            cls.flush_cached_instance(instance) 
    11701177        for offset in range(0, len(pk_list), GET_ITERATOR_CHUNK_SIZE): 
    11711178            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \ 
    11721179                (qn(cls._meta.db_table), qn(cls._meta.pk.column), 
  • django/core/serializers/xml_serializer.py

    old new  
    176176                else: 
    177177                    value = field.to_python(getInnerText(field_node).strip()) 
    178178                data[field.name] = value 
    179  
     179        # disable caching, make sure the object is fully constructed from our data and not pulled from the cache 
     180        data["meta__disable_caching"] = True 
    180181        # Return a DeserializedObject so that the m2m data has a place to live. 
    181182        return base.DeserializedObject(Model(**data), m2m_data) 
    182183 
     
    234235        else: 
    235236           pass 
    236237    return u"".join(inner_text) 
    237  
  • django/core/serializers/python.py

    old new  
    8989            # Handle all other fields 
    9090            else: 
    9191                data[field.name] = field.to_python(field_value) 
    92  
     92        # disable caching, make sure the object is fully constructed from our data and not pulled from the cache 
     93        data["meta__disable_caching"] = True 
    9394        yield base.DeserializedObject(Model(**data), m2m_data) 
    9495 
    9596def _get_model(model_identifier): 
  • tests/modeltests/select_related/models.py

    old new  
    7575        obj.save() 
    7676        parent = obj 
    7777 
     78def set_instance_caching(settings): 
     79    for cls in [Domain, Kingdom, Phylum, Klass, Order, Family, Genus]: 
     80        cls.set_instance_caching(settings) 
    7881__test__ = {'API_TESTS':""" 
    7982 
    8083# Set up. 
     
    147150>>> len(db.connection.queries) 
    1481515 
    149152 
     153# CACHING TESTS 
     154>>> Genus.instance_caching_stats() 
     155{'hits': 0, 'misses_nokey': 0, 'enabled': False, 'flushes': 0, 'misses': 0, 'cache_size': 0} 
     156 
     157# ENABLE CACHING ON ALL MODELS IN THE TEST EXCEPT SPECIES 
     158>>> set_instance_caching(True) 
     159 
     160# This should be the same as without caching 
     161>>> db.reset_queries() 
     162>>> fly = Species.objects.get(name="melanogaster") 
     163>>> fly.genus.family.order.klass.phylum.kingdom.domain 
     164<Domain: Eukaryota> 
     165>>> len(db.connection.queries) 
     1668 
     167 
     168# This should be the same as without caching 
     169>>> db.reset_queries() 
     170>>> person = Species.objects.select_related().get(name="sapiens") 
     171>>> person.genus.family.order.klass.phylum.kingdom.domain 
     172<Domain: Eukaryota> 
     173>>> len(db.connection.queries) 
     1741 
     175 
     176# now let's see how caching helps 
     177>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_2") 
     178>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_3") 
     179>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_4") 
     180>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_5") 
     181>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_6") 
     182>>> set_instance_caching(False) 
     183>>> db.reset_queries() 
     184>>> world = Species.objects.all() 
     185>>> geni_of_world = [o.genus for o in world] 
     186>>> len(db.connection.queries) # 1 for Species and 9 for the Geni 
     18710 
     188>>> Genus.instance_caching_stats() 
     189{'hits': 0, 'misses_nokey': 0, 'enabled': False, 'flushes': 0, 'misses': 0, 'cache_size': 0} 
     190 
     191>>> set_instance_caching(True) 
     192>>> db.reset_queries() 
     193>>> world = Species.objects.all() 
     194>>> geni_of_world == [o.genus for o in world] 
     195True 
     196>>> len(db.connection.queries) # 1 for Species and 4 for the distinct Geni 
     1975 
     198 
     199# here we get 8 misses because ReverseSingleRelatedObjectDescriptor misses twice when the object isn't in the cache 
     200>>> Genus.instance_caching_stats() # 4 distinct Geni and 5 rows generating hits.  
     201{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 8, 'cache_size': 4} 
     202>>> Genus.get_cached_instance(2) 
     203<Genus: Homo> 
     204>>> Genus.instance_caching_stats()['hits'] # one more hit ! 
     2056 
     206>>> Genus.instance_caching_stats_reset() 
     207>>> Genus.instance_caching_stats() 
     208{'hits': 0, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4} 
     209 
     210>>> Genus.instance_caching_stats_reset() 
     211>>> Genus.get_cached_instance(2) 
     212<Genus: Homo> 
     213>>> Genus.instance_caching_stats() 
     214{'hits': 1, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4} 
     215>>> Genus.objects.get(id=2) 
     216<Genus: Homo> 
     217>>> Genus.instance_caching_stats() 
     218{'hits': 2, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4} 
     219>>> Species.objects.get(id=2).genus 
     220<Genus: Homo> 
     221>>> Genus.instance_caching_stats() 
     222{'hits': 3, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4} 
     223 
     224>>> set_instance_caching(False) 
     225>>> set_instance_caching(True) 
     226>>> Genus.get_cached_instance(2) 
     227>>> Genus.objects.get(id=2) 
     228<Genus: Homo> 
     229>>> Genus.get_cached_instance(2) 
     230<Genus: Homo> 
     231>>> Genus.instance_caching_stats_reset() 
     232>>> db.reset_queries() 
     233>>> world = Species.objects.all() 
     234>>> sapiens = world[1] 
     235>>> len(db.connection.queries) # 1 for Species and the rest is in the cache, whoa 
     2361 
     237>>> Genus.instance_caching_stats() # we haven't touched geni yet  
     238{'hits': 0, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 0} 
     239>>> homo = sapiens.genus 
     240>>> Genus.instance_caching_stats() # 2 misses from ReverseSingleRelatedObjectDescriptor even if only one object was retrieved 
     241{'hits': 0, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 2, 'cache_size': 1} 
     242 
     243>>> set_instance_caching(False) 
     244>>> Genus.get_cached_instance(2) 
     245 
     246# This one is tricky, we get() the instance so it gets cached,  
     247# then test that instanciating with the same PK retrieves the instance 
     248>>> set_instance_caching(True) 
     249>>> Genus.instance_caching_enabled() 
     250True 
     251>>> Species.instance_caching_enabled() 
     252False 
     253>>> first_homo = Genus.objects.get(id=2) 
     254>>> first_homo 
     255<Genus: Homo> 
     256>>> Genus.instance_caching_stats()['cache_size'] 
     2571 
     258>>> Genus.instance_caching_stats()['hits'] 
     2590 
     260>>> homo = Genus.get_cached_instance(2) 
     261>>> homo 
     262<Genus: Homo> 
     263>>> Genus.instance_caching_stats()['hits'] 
     2641 
     265>>> Genus.instance_caching_stats_reset() 
     266>>> kwargs = {'id': 2} 
     267>>> Genus._get_cache_key([], kwargs) 
     2682 
     269>>> Genus(id = 2) 
     270<Genus: Homo> 
     271>>> Genus.instance_caching_stats()['hits'] 
     2721 
     273>>> Genus.flush_cached_instance(homo) 
     274>>> Genus.get_cached_instance(2) == None 
     275True 
     276>>> Genus.instance_caching_stats()['cache_size'] 
     2770 
     278>>> Genus.instance_caching_stats_reset() 
     279>>> first_homo = Genus.objects.get(id=2) 
     280>>> Genus.instance_caching_stats()['misses'] 
     2811 
     282 
     283## each of the initial species has it own genus but the 5 sapiens dupes will hit the cache 
     284#>>> Genus.instance_caching_stats() 
     285#{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 4, 'cache_size': 4} 
     286# 
     287#>>> set_instance_caching(False) # Flushes the cache 
     288#>>> set_instance_caching(True) 
     289#>>> Genus.instance_caching_stats_reset() 
     290#>>> temp = [o.genus for o in (list(Species.objects.all()) + list(Species.objects.all()))] 
     291#>>> Genus.instance_caching_stats() 
     292#{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4} 
     293# 
     294#>>> set_instance_caching(False) # Flushes the cache 
     295#>>> set_instance_caching(True) 
     296#>>> db.reset_queries() 
     297#>>> Genus.instance_caching_stats_reset() 
     298#>>> world = Species.objects.all().select_related() 
     299#>>> [o.genus for o in world] 
     300#[<Genus: Drosophila>, <Genus: Homo>, <Genus: Pisum>, <Genus: Amanita>, <Genus: Homo>, <Genus: Homo>, <Genus: Homo>, <Genus: Homo>, <Genus: Homo>] 
     301#>>> len(db.connection.queries) 
     302#1 
     303#>>> Genus.instance_caching_stats() 
     304#{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 4, 'cache_size': 4} 
     305 
    150306# Reset DEBUG to where we found it. 
    151307>>> settings.DEBUG = False 
    152308"""}