Ticket #580: cache-vary.diff

File cache-vary.diff, 26.2 KB (added by hugo, 10 years ago)

new cache patch based on Vary header (with rework of middlewares)

  • django/utils/cache.py

     
     1"""
     2This module contains helper functions and decorators
     3for controlling caching middleware and external caches.
     4It does so by managing the Vary header of responses.
     5Included are functions to patch the header of response
     6objects directly and decorators that change functions
     7to do that header patching on return.
     8
     9The Vary header of the response defines what headers a
     10cache has to take into account when building it's cache
     11key. Requests with the same path but different header
     12content for headers named in Vary need to get different
     13cache keys to prevent delivery of wrong content.
     14
     15A sample would be some i18n middleware that needs to
     16distinguish caches by the Accept-language header to
     17prevent delivering wrong language content to users just
     18because it happens to stay in the cache.
     19"""
     20
     21import re
     22import md5
     23import datetime
     24
     25from django.conf import settings
     26from django.core.cache import cache
     27
     28vary_delim_re = re.compile(r',\s*')
     29
     30def patch_response_headers(response, cache_timeout=None):
     31    """
     32    This function patches several response headers to match
     33    cache settings. Not all are cache related, but all are
     34    related to traffic reducing activity and cache controlling.
     35
     36    The cache timeout is measured in seconds and usually taken
     37    from CACHE_MIDDLEWARE_SECONDS, but can be overridden per
     38    invocation.
     39    """
     40
     41    if cache_timeout is None:
     42        cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
     43
     44    now = datetime.datetime.utcnow()
     45    expires = now + datetime.timedelta(0, cache_timeout)
     46
     47    if not response.has_header('ETag'):
     48        response['ETag'] = md5.new(response.content).hexdigest()
     49    if not response.has_header('Last-Modified'):
     50        response['Last-Modified'] = now.strftime('%a, %d %b %Y %H:%M:%S GMT')
     51    if not response.has_header('Expires'):
     52        response['Expires'] = expires.strftime('%a, %d %b %Y %H:%M:%S GMT')
     53    if not response.has_header('Cache-Control'):
     54        response['Cache-Control'] = 'max-age=%d' % cache_timeout
     55
     56def patch_vary_headers(response, newheaders):
     57    """
     58    This helper patches a header into the Vary header
     59    of the response. It takes a response object and
     60    a list of headers that should be added to the Vary
     61    header. It adds only those headers that are not
     62    already in there.
     63    """
     64    vary = []
     65    if response.has_header('Vary'):
     66        vary = vary_delim_re.split(response['Vary'])
     67    d = dict([(el.lower(), 1) for el in vary])
     68    for newheader in newheaders:
     69        if not newheader.lower() in d:
     70            vary.append(newheader)
     71    response['Vary'] = ', '.join(vary)
     72
     73def vary_on_headers(*headers):
     74    """
     75    This is a decorator that manipulates a response to
     76    take headers into account for caching. It does so by adding
     77    those headers to the Vary header of the response.
     78
     79    Usage is like this::
     80
     81       @vary_on_headers('Cookie', 'Accept-language')
     82       def index(request):
     83           ...
     84           return HttpResponse(html, context)
     85    """
     86
     87    def decorator(func):
     88   
     89        def inner_func(*args, **kw):
     90            resp = func(*args, **kw)
     91            patch_vary_headers(resp, headers)
     92            return resp
     93
     94        return inner_func
     95   
     96    return decorator
     97
     98def vary_on_cookie(func):
     99    """
     100    This is a decorator that will add the Cookie header
     101    to the Vary header of a response to designate that a
     102    page contents depends on cookies.
     103
     104    Usage is like this::
     105
     106        @vary_on_cookie
     107        def index(request):
     108            ...
     109            return HttpResponse(html, context)
     110    """
     111   
     112    def inner_func(*args, **kw):
     113        resp = func(*args, **kw)
     114        patch_vary_headers(resp, ['Cookie'])
     115        return resp
     116
     117    return inner_func
     118
     119def _generate_cache_key(request, headerlist, key_prefix):
     120    """
     121    This helper function pulls out the headers given
     122    in the header list and constructs a cache key from
     123    those.
     124    """
     125    ctx = md5.new()
     126    for header in headerlist:
     127        value = request.META.get(header, None)
     128        if value is not None:
     129            ctx.update(value)
     130    return 'views.decorators.cache.cache_page.%s.%s.%s' % (
     131        key_prefix, request.path, ctx.hexdigest())
     132
     133def get_cache_key(request, key_prefix=None):
     134    """
     135    This function returns a cache key based on the request path.
     136    It can be used in the request phase because it pulls the
     137    list of headers to take into account from the global path
     138    registry and uses those to build a cache key to check
     139    against.
     140
     141    If there is no headerlist stored, the page needs to be
     142    rebuilt and so this function returns None.
     143    """
     144
     145    if key_prefix is None:
     146        key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
     147
     148    cache_key = 'views.decorators.cache.cache_header.%s.%s' % (
     149        key_prefix, request.path)
     150    headerlist = cache.get(cache_key, None)
     151    if headerlist is not None:
     152        return _generate_cache_key(request, headerlist, key_prefix)
     153    else:
     154        return None
     155
     156def learn_cache_key(request, response, cache_timeout=None, key_prefix=None):
     157    """
     158    This function learns what headers to take into account
     159    for some request path from the response object. It stores
     160    those headers in a global path registry so that later access
     161    to that path will know what headers to take into account without
     162    building the response object itself - the headers are named in
     163    the Vary header of the response, but we want to prevent response
     164    generation.
     165
     166    The list of headers to use for cache key generation is stored
     167    in the same cache as the pages themselves. If the cache ages
     168    some data out of the cache, this just means that we have to
     169    build the response once to get at the Vary header and so at
     170    the list of headers to use for the cache key.
     171    """
     172
     173    if key_prefix is None:
     174        key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
     175
     176    if cache_timeout is None:
     177        cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
     178
     179    cache_key = 'views.decorators.cache.cache_header.%s.%s' % (
     180        key_prefix, request.path)
     181
     182    if response.has_header('Vary'):
     183        headerlist = ['HTTP_'+header.upper().replace('-', '_') for header in vary_delim_re.split(response['Vary'])]
     184        cache.set(cache_key, headerlist, cache_timeout)
     185        return _generate_cache_key(request, headerlist, key_prefix)
     186    else:
     187        # if there is no Vary header, we still need a cache key
     188        # for the request.path
     189        cache.set(cache_key, [], cache_timeout)
     190        return _generate_cache_key(request, [], key_prefix)
     191
  • django/utils/decorators.py

     
     1"""
     2This module has functions that help with
     3creating decorators for views.
     4"""
     5
     6def decorator_from_middleware(middleware_class):
     7    """
     8    This function turns any middleware that implements either
     9    process_request or process_response (or both) into a decorator
     10    so that you can use it on a per-view base.
     11    """
     12
     13    def _decorator_from_middleware(view_func, *args, **kwargs):
     14   
     15        middleware = middleware_class(*args, **kwargs)
     16
     17        def _wrapped_view(request, *args, **kwargs):
     18            if hasattr(middleware, 'process_request'):
     19                result = middleware.process_request(request)
     20                if result is not None:
     21                    return result
     22            response = view_func(request, *args, **kwargs)
     23            if hasattr(middleware, 'process_response'):
     24                result = middleware.process_response(request, response)
     25                if result is not None:
     26                    return result
     27            return response
     28
     29        return _wrapped_view
     30
     31    return _decorator_from_middleware
  • django/middleware/gzip.py

     
     1import re
     2from django.utils.text import compress_string
     3from django.utils.cache import patch_vary_headers
     4
     5re_accepts_gzip = re.compile(r'\bgzip\b')
     6
     7class GZipMiddleware:
     8    """
     9    This middleware just compresses content if the browser
     10    allows compression to take place. It sets the Vary
     11    header accordingly so that other caches will base
     12    their storage on the Accept-Encoding header.
     13    """
     14   
     15    def process_response(self, request, response):
     16        patch_vary_headers(response, ['Accept-Encoding'])
     17
     18        if response.has_header('Content-Encoding'):
     19            return response
     20
     21        ae = request.META.get('HTTP_ACCEPT_ENCODING', '')
     22        if not re_accepts_gzip.search(ae):
     23            return response
     24
     25        response.content = compress_string(response.content)
     26        response['Content-Encoding'] = 'gzip'
     27
     28        return response
  • django/middleware/http.py

     
     1import datetime
     2
     3class HTTPMiddleware:
     4    """HTTP middleware:
     5
     6    Handles conditional GET operations, if the response has a ETag og
     7    Last-Modified header, and the request has If-None-Match or
     8    If-Modified-Since the response is replaced by a HttpNotModified, when
     9    it is the case.
     10
     11    Removes the content from any response to a HEAD request.
     12
     13    Sets the Date and Content-Length response-headers.
     14    """
     15   
     16    def process_response(self, request, response):
     17        now = datetime.datetime.utcnow()
     18        response['Date'] = now.strftime('%a, %d %b %Y %H:%M:%S GMT')
     19        if not response.has_header('Content-Length'):
     20            response['Content-Length'] = str(len(response.content))
     21   
     22        if response.has_header('ETag'):
     23            if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None)
     24            if if_none_match == response['ETag']:
     25                response.status_code = 304
     26                response.content = ''
     27                response['Content-Length'] = '0'
     28
     29        if response.has_header('Last-Modified'):
     30            last_mod = response['Last-Modified']
     31            if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None)
     32            if if_modified_since == response['Last-Modified']:
     33                response.status_code = 304
     34                response.content = ''
     35                response['Content-Length'] = '0'
     36
     37        if request.META['REQUEST_METHOD'] == 'HEAD':
     38            response.content = ''
     39
     40        return response
  • django/middleware/cache.py

     
     1"""A Vary-capable cache middleware. Adapted from Djangos cache
     2middleware."""
     3
    14from django.conf import settings
    25from django.core.cache import cache
     6from django.utils.cache import get_cache_key, learn_cache_key, patch_vary_headers, patch_response_headers
    37from django.utils.httpwrappers import HttpResponseNotModified
    4 from django.utils.text import compress_string
    5 import datetime, md5
    68
     9import datetime
     10import copy
     11import md5
     12
    713class CacheMiddleware:
    814    """
    915    Cache middleware. If this is enabled, each Django-powered page will be
    10     cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs. Pages
    11     with GET or POST parameters are not cached.
     16    cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs.
    1217
    13     If the cache is shared across multiple sites using the same Django
    14     installation, set the CACHE_MIDDLEWARE_KEY_PREFIX to the name of the site,
    15     or some other string that is unique to this Django instance, to prevent key
    16     collisions.
     18    Only parameter-less GET or HEAD-requests are cached.
    1719
    18     This middleware will also make the following optimizations:
     20    Only responses with status-code 200 will be cached.
    1921
    20     * If the CACHE_MIDDLEWARE_GZIP setting is True, the content will be
    21       gzipped.
     22    This middleware expects that a HEAD request is answered with a
     23    response exactly like the corresponding GET request.
    2224
    23     * ETags will be added, using a simple MD5 hash of the page's content.
     25    When a hit occurs, a shallow copy of the original response object is
     26    returned from process_request.
     27
     28    Pages will be cached based on the contents of the request headers
     29    listed in the response Vary-header [ FIXME -- need example, or better
     30    description? ]. This means that pages cannot change their Vary-header,
     31    without strange results.
     32
     33    Also, this middleware sets ETag, Last-Modified, Expires and
     34    Cache-Control headers on the response object.
    2435    """
     36
     37    def __init__(self, cache_timeout=None, key_prefix=None):
     38        self.cache_timeout = cache_timeout
     39        if cache_timeout is None:
     40            self.cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
     41        self.key_prefix = key_prefix
     42        if key_prefix is None:
     43            self.key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
     44
    2545    def process_request(self, request):
    26         """
    27         Checks whether the page is already cached. If it is, returns the cached
    28         version. Also handles ETag stuff.
    29         """
    30         if request.GET or request.POST:
    31             request._cache_middleware_set_cache = False
     46        """Checks whether the page is already cached. If it is, returns
     47        the cached version."""
     48
     49        method = request.META['REQUEST_METHOD']
     50        if not method in ('GET', 'HEAD') or request.GET:
     51            request._cache_update_cache = False
    3252            return None # Don't bother checking the cache.
    3353
    34         accept_encoding = ''
    35         if settings.CACHE_MIDDLEWARE_GZIP:
    36             try:
    37                 accept_encoding = request.META['HTTP_ACCEPT_ENCODING']
    38             except KeyError:
    39                 pass
    40         accepts_gzip = 'gzip' in accept_encoding
    41         request._cache_middleware_accepts_gzip = accepts_gzip
     54        cache_key = get_cache_key(request, self.key_prefix)
     55        if cache_key is None:
     56            request._cache_update_cache = True
     57            return None # No cache information available, need to rebuild
    4258
    43         # This uses the same cache_key as views.decorators.cache.cache_page,
    44         # so the cache can be shared.
    45         cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % \
    46             (settings.CACHE_MIDDLEWARE_KEY_PREFIX, request.path, accepts_gzip)
    47         request._cache_middleware_key = cache_key
    48 
    4959        response = cache.get(cache_key, None)
    5060        if response is None:
    51             request._cache_middleware_set_cache = True
    52             return None
    53         else:
    54             request._cache_middleware_set_cache = False
    55             # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet
    56             try:
    57                 if_none_match = request.META['HTTP_IF_NONE_MATCH']
    58             except KeyError:
    59                 if_none_match = None
    60             try:
    61                 if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
    62             except KeyError:
    63                 if_modified_since = None
    64             if if_none_match is None and if_modified_since is None:
    65                 pass
    66             elif if_none_match is not None and response['ETag'] != if_none_match:
    67                 pass
    68             elif if_modified_since is not None and response['Last-Modified'] != if_modified_since:
    69                 pass
    70             else:
    71                 return HttpResponseNotModified()
    72         return response
     61            request._cache_update_cache = True
     62            return None # No cache information available, need to rebuild
    7363
     64        request._cache_update_cache = False
     65        return copy.copy(response)
     66
    7467    def process_response(self, request, response):
    75         """
    76         Sets the cache, if needed.
    77         """
    78         if request._cache_middleware_set_cache:
    79             content = response.get_content_as_string(settings.DEFAULT_CHARSET)
    80             if request._cache_middleware_accepts_gzip:
    81                 content = compress_string(content)
    82                 response.content = content
    83                 response['Content-Encoding'] = 'gzip'
    84             response['ETag'] = md5.new(content).hexdigest()
    85             response['Content-Length'] = '%d' % len(content)
    86             response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
    87             cache.set(request._cache_middleware_key, response, settings.CACHE_MIDDLEWARE_SECONDS)
     68        """Sets the cache, if needed."""
     69
     70        if not request._cache_update_cache:
     71            # we don't need to update the cache, just return
     72            return response
     73        if not request.META['REQUEST_METHOD'] == 'GET':
     74            # This is a stronger requirement than above. It is needed
     75            # because of interactions between this middleware and the
     76            # HTTPMiddleware, which throws the body of a HEAD-request
     77            # away before this middleware gets a chance to cache it.
     78            return response
     79        if not response.status_code == 200:
     80            return response
     81
     82        patch_response_headers(response, self.cache_timeout)
     83
     84        cache_key = learn_cache_key(request, response, self.cache_timeout, self.key_prefix)
     85        cache.set(cache_key, response, self.cache_timeout)
     86
    8887        return response
  • django/middleware/sessions.py

     
    11from django.conf.settings import SESSION_COOKIE_NAME, SESSION_COOKIE_AGE, SESSION_COOKIE_DOMAIN
    22from django.models.core import sessions
     3from django.utils.cache import patch_vary_headers
    34import datetime
    45
    56TEST_COOKIE_NAME = 'testcookie'
     
    6162    def process_response(self, request, response):
    6263        # If request.session was modified, or if response.session was set, save
    6364        # those changes and set a session cookie.
     65        patch_vary_headers(response, ['Cookie'])
    6466        try:
    6567            modified = request.session.modified
    6668        except AttributeError:
  • django/views/decorators/gzip.py

     
     1"""
     2Decorator for views that compresses pages if the client supports it.
     3Additionally it patches the headers to match the now compressed
     4content and adds the Accept-Encoding header to the Vary header to
     5give caches a hint on what to take into account for cache keys.
     6"""
     7from django.utils.decorators import decorator_from_middleware
     8from django.middleware.gzip import GZipMiddleware
     9
     10gzip_page = decorator_from_middleware(GZipMiddleware)
     11
  • django/views/decorators/http.py

     
     1"""
     2Decorator for views that supports conditional get on ETag and
     3Last-Modified headers.
     4"""
     5
     6from django.utils.decorators import decorator_from_middleware
     7from django.middleware.http import HTTPMiddleware
     8
     9conditional_page = decorator_from_middleware(HTTPMiddleware)
     10
  • django/views/decorators/cache.py

     
    1 from django.core.cache import cache
    2 from django.utils.httpwrappers import HttpResponseNotModified
    3 from django.utils.text import compress_string
    4 from django.conf.settings import DEFAULT_CHARSET
    5 import datetime, md5
     1"""
     2Decorator for views that tries getting the page from the cache and
     3populates the cache if the page isn't in the cache yet.
    64
    7 def cache_page(view_func, cache_timeout, key_prefix=''):
    8     """
    9     Decorator for views that tries getting the page from the cache and
    10     populates the cache if the page isn't in the cache yet. Also takes care
    11     of ETags and gzips the page if the client supports it.
     5The cache is keyed by the URL and some data from the headers. Additionally
     6there is the key prefix that is used to distinguish different cache areas
     7in a multi-site setup. You could use the sites.get_current().domain, for
     8example, as that is unique across a django project.
    129
    13     The cache is keyed off of the page's URL plus the optional key_prefix
    14     variable. Use key_prefix if your Django setup has multiple sites that
    15     use cache; otherwise the cache for one site would affect the other. A good
    16     example of key_prefix is to use sites.get_current().domain, because that's
    17     unique across all Django instances on a particular server.
    18     """
    19     def _check_cache(request, *args, **kwargs):
    20         try:
    21             accept_encoding = request.META['HTTP_ACCEPT_ENCODING']
    22         except KeyError:
    23             accept_encoding = ''
    24         accepts_gzip = 'gzip' in accept_encoding
    25         cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % (key_prefix, request.path, accepts_gzip)
    26         response = cache.get(cache_key, None)
    27         if response is None:
    28             response = view_func(request, *args, **kwargs)
    29             content = response.get_content_as_string(DEFAULT_CHARSET)
    30             if accepts_gzip:
    31                 content = compress_string(content)
    32                 response.content = content
    33                 response['Content-Encoding'] = 'gzip'
    34             response['ETag'] = md5.new(content).hexdigest()
    35             response['Content-Length'] = '%d' % len(content)
    36             response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
    37             cache.set(cache_key, response, cache_timeout)
    38         else:
    39             # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet
    40             try:
    41                 if_none_match = request.META['HTTP_IF_NONE_MATCH']
    42             except KeyError:
    43                 if_none_match = None
    44             try:
    45                 if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
    46             except KeyError:
    47                 if_modified_since = None
    48             if if_none_match is None and if_modified_since is None:
    49                 pass
    50             elif if_none_match is not None and response['ETag'] != if_none_match:
    51                 pass
    52             elif if_modified_since is not None and response['Last-Modified'] != if_modified_since:
    53                 pass
    54             else:
    55                 return HttpResponseNotModified()
    56         return response
    57     return _check_cache
     10Additionally all headers from the responses Vary header will be taken into
     11account on caching (just like the middleware does).
     12"""
     13
     14from django.utils.decorators import decorator_from_middleware
     15from django.middleware.cache import CacheMiddleware
     16
     17cache_page = decorator_from_middleware(CacheMiddleware)
  • docs/cache.txt

     
    104104
    105105Pages with GET or POST parameters won't be cached.
    106106
    107 The cache middleware also makes a few more optimizations:
    108 
    109 * Sets and deals with ``ETag`` headers.
    110 * Sets the ``Content-Length`` header.
     107Additionally some cache helper headers are set up by the CacheMiddlware:
    111108* Sets the ``Last-Modified`` header to the current date/time when a fresh
    112109  (uncached) version of the page is requested.
     110* Sets the ``Expires`` header to the current date/time plus the defined
     111  cache seconds
     112* Sets the ``Cache-Control`` header to give a max age for the page
     113  from the configured cache seconds
    113114
    114 It doesn't matter where in the middleware stack you put the cache middleware.
     115There are other optimizations that could help with high traffic. Those are
     116implemented with different middlewares, though:
    115117
     118* Support for conditional GET is done by the HTTPMiddleware in
     119  django.middleware.http. This makes use of ``ETag`` and
     120  ``Last-Modified`` headers.
     121* Support for gzip encoding responses is done by the GzipMiddleware in
     122  django.middleware.gzip
     123
     124It's important where in the middleware stack you put the cache middleware, as it
     125needs to know what headers to vary the cache storage on. Those headers are given
     126as a list in the ``Vary`` header of the response and so the cache middleware should
     127come after all middlewares that might change this header (like the SessionMiddleware -
     128which adds the Cookie header - or the GzipMiddleware - which adds the ``Accept-Encoding``
     129header).
     130
     131Since the cache key for a request is based on the request path and those headers from
     132the request that are listed in the Vary response header, you need a way to tell for
     133your function how this Vary header should be set. There are several helper functions
     134to do this, the important ones are just plain decorators::
     135
     136    from django.utils.cache import vary_on_cookie, vary_on_headers, patch_vary_headers
     137
     138    @vary_on_cookie
     139    def index(request):
     140        ...
     141        return render_to_response('template', context)
     142
     143    @vary_on_headers('Cookie', 'Accept-Language')
     144    def overview(request):
     145        ...
     146        return render_to_response('template', context)
     147
     148    def something(request):
     149        ...
     150        response = render_to_response('template', context)
     151        patch_vary_headers(response, ['Cookie'])
     152        return response
     153
     154Of course middleware that knows what changes need to be done to the Vary response header
     155will do that themselves. That way you don't need to use vary_on_cookie if you activate the
     156SessionMiddlware, as that will already patch the Vary response header itself.
     157
    116158The per-page cache
    117159==================
    118160
     
    135177        ...
    136178
    137179This will cache the result of that view for 15 minutes. (The cache timeout is
    138 in seconds.)
     180in seconds.). The cache is used in the same way as the middlware so that cache
     181entries can be shared. There are ``gzip_page`` and ``conditional_page`` decorators
     182in correspondence to the other middlewares.
    139183
    140184The low-level cache API
    141185=======================
Back to Top