| 1 |
import re |
|---|
| 2 |
|
|---|
| 3 |
from django.conf import settings |
|---|
| 4 |
from django import http |
|---|
| 5 |
from django.core.mail import mail_managers |
|---|
| 6 |
from django.utils.http import urlquote |
|---|
| 7 |
from django.core import urlresolvers |
|---|
| 8 |
from django.utils.hashcompat import md5_constructor |
|---|
| 9 |
|
|---|
| 10 |
class CommonMiddleware(object): |
|---|
| 11 |
""" |
|---|
| 12 |
"Common" middleware for taking care of some basic operations: |
|---|
| 13 |
|
|---|
| 14 |
- Forbids access to User-Agents in settings.DISALLOWED_USER_AGENTS |
|---|
| 15 |
|
|---|
| 16 |
- URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings, |
|---|
| 17 |
this middleware appends missing slashes and/or prepends missing |
|---|
| 18 |
"www."s. |
|---|
| 19 |
|
|---|
| 20 |
- If APPEND_SLASH is set and the initial URL doesn't end with a |
|---|
| 21 |
slash, and it is not found in urlpatterns, a new URL is formed by |
|---|
| 22 |
appending a slash at the end. If this new URL is found in |
|---|
| 23 |
urlpatterns, then an HTTP-redirect is returned to this new URL; |
|---|
| 24 |
otherwise the initial URL is processed as usual. |
|---|
| 25 |
|
|---|
| 26 |
- ETags: If the USE_ETAGS setting is set, ETags will be calculated from |
|---|
| 27 |
the entire page content and Not Modified responses will be returned |
|---|
| 28 |
appropriately. |
|---|
| 29 |
""" |
|---|
| 30 |
|
|---|
| 31 |
def process_request(self, request): |
|---|
| 32 |
""" |
|---|
| 33 |
Check for denied User-Agents and rewrite the URL based on |
|---|
| 34 |
settings.APPEND_SLASH and settings.PREPEND_WWW |
|---|
| 35 |
""" |
|---|
| 36 |
|
|---|
| 37 |
# Check for denied User-Agents |
|---|
| 38 |
if 'HTTP_USER_AGENT' in request.META: |
|---|
| 39 |
for user_agent_regex in settings.DISALLOWED_USER_AGENTS: |
|---|
| 40 |
if user_agent_regex.search(request.META['HTTP_USER_AGENT']): |
|---|
| 41 |
return http.HttpResponseForbidden('<h1>Forbidden</h1>') |
|---|
| 42 |
|
|---|
| 43 |
# Check for a redirect based on settings.APPEND_SLASH |
|---|
| 44 |
# and settings.PREPEND_WWW |
|---|
| 45 |
host = request.get_host() |
|---|
| 46 |
old_url = [host, request.path] |
|---|
| 47 |
new_url = old_url[:] |
|---|
| 48 |
|
|---|
| 49 |
if (settings.PREPEND_WWW and old_url[0] and |
|---|
| 50 |
not old_url[0].startswith('www.')): |
|---|
| 51 |
new_url[0] = 'www.' + old_url[0] |
|---|
| 52 |
|
|---|
| 53 |
# Append a slash if APPEND_SLASH is set and the URL doesn't have a |
|---|
| 54 |
# trailing slash and there is no pattern for the current path |
|---|
| 55 |
if settings.APPEND_SLASH and (not old_url[1].endswith('/')): |
|---|
| 56 |
if (not _is_valid_path(request.path_info) and |
|---|
| 57 |
_is_valid_path("%s/" % request.path_info)): |
|---|
| 58 |
new_url[1] = new_url[1] + '/' |
|---|
| 59 |
if settings.DEBUG and request.method == 'POST': |
|---|
| 60 |
raise RuntimeError, ("" |
|---|
| 61 |
"You called this URL via POST, but the URL doesn't end " |
|---|
| 62 |
"in a slash and you have APPEND_SLASH set. Django can't " |
|---|
| 63 |
"redirect to the slash URL while maintaining POST data. " |
|---|
| 64 |
"Change your form to point to %s%s (note the trailing " |
|---|
| 65 |
"slash), or set APPEND_SLASH=False in your Django " |
|---|
| 66 |
"settings.") % (new_url[0], new_url[1]) |
|---|
| 67 |
|
|---|
| 68 |
if new_url == old_url: |
|---|
| 69 |
# No redirects required. |
|---|
| 70 |
return |
|---|
| 71 |
if new_url[0]: |
|---|
| 72 |
newurl = "%s://%s%s" % ( |
|---|
| 73 |
request.is_secure() and 'https' or 'http', |
|---|
| 74 |
new_url[0], urlquote(new_url[1])) |
|---|
| 75 |
else: |
|---|
| 76 |
newurl = urlquote(new_url[1]) |
|---|
| 77 |
if request.GET: |
|---|
| 78 |
newurl += '?' + request.META['QUERY_STRING'] |
|---|
| 79 |
return http.HttpResponsePermanentRedirect(newurl) |
|---|
| 80 |
|
|---|
| 81 |
def process_response(self, request, response): |
|---|
| 82 |
"Check for a flat page (for 404s) and calculate the Etag, if needed." |
|---|
| 83 |
if response.status_code == 404: |
|---|
| 84 |
if settings.SEND_BROKEN_LINK_EMAILS: |
|---|
| 85 |
# If the referrer was from an internal link or a non-search-engine site, |
|---|
| 86 |
# send a note to the managers. |
|---|
| 87 |
domain = request.get_host() |
|---|
| 88 |
referer = request.META.get('HTTP_REFERER', None) |
|---|
| 89 |
is_internal = _is_internal_request(domain, referer) |
|---|
| 90 |
path = request.get_full_path() |
|---|
| 91 |
if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer): |
|---|
| 92 |
ua = request.META.get('HTTP_USER_AGENT', '<none>') |
|---|
| 93 |
ip = request.META.get('REMOTE_ADDR', '<none>') |
|---|
| 94 |
mail_managers("Broken %slink on %s" % ((is_internal and 'INTERNAL ' or ''), domain), |
|---|
| 95 |
"Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" \ |
|---|
| 96 |
% (referer, request.get_full_path(), ua, ip)) |
|---|
| 97 |
return response |
|---|
| 98 |
|
|---|
| 99 |
# Use ETags, if requested. |
|---|
| 100 |
if settings.USE_ETAGS: |
|---|
| 101 |
if response.has_header('ETag'): |
|---|
| 102 |
etag = response['ETag'] |
|---|
| 103 |
else: |
|---|
| 104 |
etag = '"%s"' % md5_constructor(response.content).hexdigest() |
|---|
| 105 |
if response.status_code >= 200 and response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag: |
|---|
| 106 |
cookies = response.cookies |
|---|
| 107 |
response = http.HttpResponseNotModified() |
|---|
| 108 |
response.cookies = cookies |
|---|
| 109 |
else: |
|---|
| 110 |
response['ETag'] = etag |
|---|
| 111 |
|
|---|
| 112 |
return response |
|---|
| 113 |
|
|---|
| 114 |
def _is_ignorable_404(uri): |
|---|
| 115 |
""" |
|---|
| 116 |
Returns True if a 404 at the given URL *shouldn't* notify the site managers. |
|---|
| 117 |
""" |
|---|
| 118 |
for start in settings.IGNORABLE_404_STARTS: |
|---|
| 119 |
if uri.startswith(start): |
|---|
| 120 |
return True |
|---|
| 121 |
for end in settings.IGNORABLE_404_ENDS: |
|---|
| 122 |
if uri.endswith(end): |
|---|
| 123 |
return True |
|---|
| 124 |
return False |
|---|
| 125 |
|
|---|
| 126 |
def _is_internal_request(domain, referer): |
|---|
| 127 |
""" |
|---|
| 128 |
Returns true if the referring URL is the same domain as the current request. |
|---|
| 129 |
""" |
|---|
| 130 |
# Different subdomains are treated as different domains. |
|---|
| 131 |
return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer) |
|---|
| 132 |
|
|---|
| 133 |
def _is_valid_path(path): |
|---|
| 134 |
""" |
|---|
| 135 |
Returns True if the given path resolves against the default URL resolver, |
|---|
| 136 |
False otherwise. |
|---|
| 137 |
|
|---|
| 138 |
This is a convenience method to make working with "is this a match?" cases |
|---|
| 139 |
easier, avoiding unnecessarily indented try...except blocks. |
|---|
| 140 |
""" |
|---|
| 141 |
try: |
|---|
| 142 |
urlresolvers.resolve(path) |
|---|
| 143 |
return True |
|---|
| 144 |
except urlresolvers.Resolver404: |
|---|
| 145 |
return False |
|---|