Index: django/http/__init__.py =================================================================== --- django/http/__init__.py (revision 5078) +++ django/http/__init__.py (working copy) @@ -1,9 +1,14 @@ -import os +import os, pickle from Cookie import SimpleCookie from pprint import pformat from urllib import urlencode, quote from django.utils.datastructures import MultiValueDict +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + RESERVED_CHARS="!*'();:@&=+$,/?%#[]" try: @@ -42,37 +47,316 @@ def is_secure(self): return os.environ.get("HTTPS") == "on" -def parse_file_upload(header_dict, post_data): - "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)" - import email, email.Message - from cgi import parse_header - raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()]) - raw_message += '\r\n\r\n' + post_data - msg = email.message_from_string(raw_message) - POST = MultiValueDict() - FILES = MultiValueDict() - for submessage in msg.get_payload(): - if submessage and isinstance(submessage, email.Message.Message): - name_dict = parse_header(submessage['Content-Disposition'])[1] - # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads - # or {'name': 'blah'} for POST fields - # We assume all uploaded files have a 'filename' set. - if name_dict.has_key('filename'): - assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported" - if not name_dict['filename'].strip(): - continue - # IE submits the full path, so trim everything but the basename. - # (We can't use os.path.basename because it expects Linux paths.) - filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:] - FILES.appendlist(name_dict['name'], { - 'filename': filename, - 'content-type': (submessage.has_key('Content-Type') and submessage['Content-Type'] or None), - 'content': submessage.get_payload(), - }) - else: - POST.appendlist(name_dict['name'], submessage.get_payload()) - return POST, FILES +def parse_file_upload(headers, input): + from django.conf import settings + # Only stream files to disk if FILE_STREAMING_DIR is set + file_upload_dir = settings.FILE_UPLOAD_DIR + streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE + + try: + parser = MultiPartParser(headers, input, file_upload_dir, streaming_min_post_size) + return parser.parse() + except MultiPartParserError, e: + return MultiValueDict({ '_file_upload_error': [e.message] }), {} + +class MultiPartParserError(Exception): + def __init__(self, message): + self.message = message + def __str__(self): + return repr(self.message) + +class MultiPartParser(object): + """ + A rfc2388 multipart/form-data parser. + + parse() reads the input stream in chunk_size chunks and returns a + tuple of (POST MultiValueDict, FILES MultiValueDict). If + file_upload_dir is defined files will be streamed to temporary + files in the specified directory. + + The FILES dictionary will have 'filename', 'content-type', + 'content' and 'content-length' entries. For streamed files it will + also have 'tmpfilename' and 'tmpfile'. The 'content' entry will + only be read from disk when referenced for streamed files. + + If the header X-Progress-ID is sent with a 32 character hex string + a temporary file with the same name will be created in + `file_upload_dir`` with a pickled { 'received', 'size' } + dictionary with the number of bytes received and the size expected + respectively. The file will be unlinked when the parser finishes. + + """ + + def __init__(self, headers, input, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64): + try: + content_length = int(headers['Content-Length']) + except: + raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length')) + + content_type = headers.get('Content-Type') + + if not content_type or not content_type.startswith('multipart/'): + raise MultiPartParserError('Invalid Content-Type: %s' % content_type) + + ctype, opts = self.parse_header(content_type) + boundary = opts.get('boundary') + from cgi import valid_boundary + if not boundary or not valid_boundary(boundary): + raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary) + + # check if we got a valid X-Progress-ID id + progress_id = headers.get('X-Progress-ID') + if file_upload_dir and progress_id: + import re + if re.match(r'^[0-9a-zA-Z]{32}$', progress_id): + self._progress_filename = os.path.join(file_upload_dir, progress_id) + raise MultiPartParserError('Invalid X-Progress-ID: %s' % progress_id) + else: + self._progress_filename = None + self._boundary = '--' + boundary + self._input = input + self._size = content_length + self._received = 0 + self._file_upload_dir = file_upload_dir + self._chunk_size = chunk_size + self._state = 'PREAMBLE' + self._partial = '' + self._post = MultiValueDict() + self._files = MultiValueDict() + + if streaming_min_post_size is not None and content_length < streaming_min_post_size: + self._file_upload_dir = None # disable file streaming for small request + + try: + # use mx fast string search if available + from mx.TextTools import FS + self._fs = FS(self._boundary) + except ImportError: + self._fs = None + + def parse(self): + try: + self._parse() + finally: + if self._progress_filename: + try: + os.unlink(self._progress_filename) + except OSError: + pass + + return self._post, self._files + + def _parse(self): + size = self._size + + try: + while size > 0: + n = self._read(self._input, min(self._chunk_size, size)) + if not n: + break + size -= n + except: + # consume any remaining data so we dont generate a "Connection Reset" error + size = self._size - self._received + while size > 0: + data = self._input.read(min(self._chunk_size, size)) + size -= len(data) + raise + + def _find_boundary(self, data, start, stop): + """ + Find the next boundary and return the end of current part + and start of next part. + """ + if self._fs: + boundary = self._fs.find(data, start, stop) + else: + boundary = data.find(self._boundary, start, stop) + if boundary >= 0: + end = boundary + next = boundary + len(self._boundary) + + # backup over CRLF + if end > 0 and data[end-1] == '\n': end -= 1 + if end > 0 and data[end-1] == '\r': end -= 1 + # skip over --CRLF + if next < stop and data[next] == '-': next += 1 + if next < stop and data[next] == '-': next += 1 + if next < stop and data[next] == '\r': next += 1 + if next < stop and data[next] == '\n': next += 1 + + return True, end, next + else: + return False, stop, stop + + class TemporaryFile(object): + "A temporary file that tries to delete itself when garbage collected." + def __init__(self, dir): + import tempfile + (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir) + self.file = os.fdopen(fd, 'w+b') + self.name = name + + def __getattr__(self, name): + a = getattr(self.__dict__['file'], name) + if type(a) != type(0): + setattr(self, name, a) + return a + + def __del__(self): + try: + os.unlink(self.name) + except OSError: + pass + + class LazyContent(dict): + """ + A lazy FILES dictionary entry that reads the contents from + tmpfile only when referenced. + """ + def __init__(self, data): + dict.__init__(self, data) + + def __getitem__(self, key): + if key == 'content' and not self.has_key(key): + self['tmpfile'].seek(0) + self['content'] = self['tmpfile'].read() + return dict.__getitem__(self, key) + + def _read(self, input, size): + data = input.read(size) + + if not data: + return 0 + + read_size = len(data) + self._received += read_size + + if self._partial: + data = self._partial + data + + start = 0 + stop = len(data) + + while start < stop: + boundary, end, next = self._find_boundary(data, start, stop) + + if not boundary and read_size: + # make sure we dont treat a partial boundary (and its separators) as data + stop -= len(self._boundary) + 16 + end = next = stop + if end <= start: + break # need more data + + if self._state == 'PREAMBLE': + # Preamble, just ignore it + self._state = 'HEADER' + + elif self._state == 'HEADER': + # Beginning of header, look for end of header and parse it if found. + + header_end = data.find('\r\n\r\n', start, stop) + if header_end == -1: + break # need more data + + header = data[start:header_end] + + self._fieldname = None + self._filename = None + self._content_type = None + + for line in header.split('\r\n'): + ctype, opts = self.parse_header(line) + if ctype == 'content-disposition: form-data': + self._fieldname = opts.get('name') + self._filename = opts.get('filename') + elif ctype.startswith('content-type: '): + self._content_type = ctype[14:] + + if self._filename is not None: + # cleanup filename from IE full paths: + self._filename = self._filename[self._filename.rfind("\\")+1:].strip() + + if self._filename: # ignore files without filenames + if self._file_upload_dir: + try: + self._file = self.TemporaryFile(dir=self._file_upload_dir) + except: + raise MultiPartParserError("Failed to create temporary file.") + else: + self._file = StringIO() + else: + self._file = None + self._filesize = 0 + self._state = 'FILE' + else: + self._field = StringIO() + self._state = 'FIELD' + next = header_end + 4 + + elif self._state == 'FIELD': + # In a field, collect data until a boundary is found. + + self._field.write(data[start:end]) + if boundary: + if self._fieldname: + self._post.appendlist(self._fieldname, self._field.getvalue()) + self._field.close() + self._state = 'HEADER' + + elif self._state == 'FILE': + # In a file, collect data until a boundary is found. + + if self._file: + try: + self._file.write(data[start:end]) + except IOError, e: + raise MultiPartParserError("Failed to write to temporary file.") + self._filesize += end-start + + if self._progress_filename: + f = open(os.path.join(self._file_upload_dir, self._progress_filename), 'w') + pickle.dump({ 'received': self._received, 'size': self._size }, f) + f.close() + + if boundary: + if self._file: + if self._file_upload_dir: + self._file.seek(0) + file = self.LazyContent({ + 'filename': self._filename, + 'content-type': self._content_type, + # 'content': is read on demand + 'content-length': self._filesize, + 'tmpfilename': self._file.name, + 'tmpfile': self._file + }) + else: + file = { + 'filename': self._filename, + 'content-type': self._content_type, + 'content': self._file.getvalue(), + 'content-length': self._filesize + } + self._file.close() + + self._files.appendlist(self._fieldname, file) + + self._state = 'HEADER' + + start = next + + self._partial = data[start:] + + return read_size + + def parse_header(self, line): + from cgi import parse_header + return parse_header(line) + + + class QueryDict(MultiValueDict): """A specialized MultiValueDict that takes a query string when initialized. This is immutable unless you create a copy of it.""" @@ -306,3 +590,4 @@ if not host: host = request.META.get('HTTP_HOST', '') return host + Index: django/oldforms/__init__.py =================================================================== --- django/oldforms/__init__.py (revision 5078) +++ django/oldforms/__init__.py (working copy) @@ -666,17 +666,22 @@ self.validator_list = [self.isNonEmptyFile] + validator_list def isNonEmptyFile(self, field_data, all_data): - try: - content = field_data['content'] - except TypeError: + if field_data.has_key('_file_upload_error'): + raise validators.CriticalValidationError, field_data['_file_upload_error'] + if not field_data.has_key('filename'): raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.") - if not content: + if not field_data['content-length']: raise validators.CriticalValidationError, gettext("The submitted file is empty.") def render(self, data): return '' % \ (self.get_id(), self.__class__.__name__, self.field_name) + def prepare(self, new_data): + if new_data.has_key('_file_upload_error'): + # pretend we got something in the field to raise a validation error later + new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] } + def html2python(data): if data is None: raise EmptyValue Index: django/db/models/base.py =================================================================== --- django/db/models/base.py (revision 5078) +++ django/db/models/base.py (working copy) @@ -18,6 +18,14 @@ import sys import os +# File_move_func will be used to try to move a file +# after it has been uploaded. +try: + import shutils + file_move_func = shutils.move +except: + file_move_func = os.rename + class ModelBase(type): "Metaclass for all models" def __new__(cls, name, bases, attrs): @@ -361,7 +369,7 @@ def _get_FIELD_size(self, field): return os.path.getsize(self._get_FIELD_filename(field)) - def _save_FIELD_file(self, field, filename, raw_contents, save=True): + def _save_FIELD_file(self, field, filename, raw_field, save=True): directory = field.get_directory_name() try: # Create the date-based directory if it doesn't exist. os.makedirs(os.path.join(settings.MEDIA_ROOT, directory)) @@ -383,10 +391,25 @@ setattr(self, field.attname, filename) full_filename = self._get_FIELD_filename(field) - fp = open(full_filename, 'wb') - fp.write(raw_contents) - fp.close() + if raw_field.has_key('tmpfilename'): + raw_field['tmpfile'].close() + try: + file_move_func(raw_field['tmpfilename'], full_filename) + except: + # fall back to just python file commands + new_file = open(full_filename, 'w') + tmp_file = open(raw_field['tmpfilename'],'r') + new_file.write(tmp_file.read()) + + new_file.close() + tmp_file.close() + os.remove(raw_field['tmpfilename']) + else: + fp = open(full_filename, 'wb') + fp.write(raw_field['content']) + fp.close() + # Save the width and/or height, if applicable. if isinstance(field, ImageField) and (field.width_field or field.height_field): from django.utils.images import get_image_dimensions Index: django/db/models/fields/__init__.py =================================================================== --- django/db/models/fields/__init__.py (revision 5078) +++ django/db/models/fields/__init__.py (working copy) @@ -636,7 +636,7 @@ setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self)) setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self)) setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self)) - setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save)) + setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save)) dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls) def delete_file(self, instance): @@ -659,9 +659,9 @@ if new_data.get(upload_field_name, False): func = getattr(new_object, 'save_%s_file' % self.name) if rel: - func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save) + func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save) else: - func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save) + func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save) def get_directory_name(self): return os.path.normpath(datetime.datetime.now().strftime(self.upload_to)) Index: django/conf/global_settings.py =================================================================== --- django/conf/global_settings.py (revision 5078) +++ django/conf/global_settings.py (working copy) @@ -240,6 +240,20 @@ # isExistingURL validator. URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)" +# The directory to place streamed file uploads. The web server needs write +# permissions on this directory. +# If this is None, streaming uploads are disabled. +FILE_UPLOAD_DIR = None + + +# The minimum size of a POST before file uploads are streamed to disk. +# Any less than this number, and the file is uploaded to memory. +# Size is in bytes. +STREAMING_MIN_POST_SIZE = 512 * (2**10) + + + + ############## # MIDDLEWARE # ############## @@ -335,3 +349,5 @@ # The list of directories to search for fixtures FIXTURE_DIRS = () + + Index: django/core/handlers/wsgi.py =================================================================== --- django/core/handlers/wsgi.py (revision 5078) +++ django/core/handlers/wsgi.py (working copy) @@ -111,7 +111,14 @@ if self.environ.get('CONTENT_TYPE', '').startswith('multipart'): header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')]) header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '') - self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data) + header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '') + header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '') + try: + self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input']) + except: + self._post, self._files = {}, {} # make sure we dont read the input stream again + raise + self._raw_post_data = None # raw data is not available for streamed multipart messages else: self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict() else: Index: django/core/handlers/modpython.py =================================================================== --- django/core/handlers/modpython.py (revision 5078) +++ django/core/handlers/modpython.py (working copy) @@ -47,7 +47,12 @@ def _load_post_and_files(self): "Populates self._post and self._files" if self._req.headers_in.has_key('content-type') and self._req.headers_in['content-type'].startswith('multipart'): - self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data) + self._raw_post_data = None # raw data is not available for streamed multipart messages + try: + self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req) + except: + self._post, self._files = {}, {} # make sure we dont read the input stream again + raise else: self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict() Index: tests/modeltests/test_client/views.py =================================================================== --- tests/modeltests/test_client/views.py (revision 5078) +++ tests/modeltests/test_client/views.py (working copy) @@ -44,6 +44,12 @@ return HttpResponse(t.render(c)) +def post_file_view(request): + "A view that expects a multipart post and returns a file in the context" + t = Template('File {{ file.filename }} received', name='POST Template') + c = Context({'file': request.FILES['file_file']}) + return HttpResponse(t.render(c)) + def redirect_view(request): "A view that redirects all requests to the GET view" return HttpResponseRedirect('/test_client/get_view/') Index: tests/modeltests/test_client/models.py =================================================================== --- tests/modeltests/test_client/models.py (revision 5078) +++ tests/modeltests/test_client/models.py (working copy) @@ -75,6 +75,21 @@ self.assertEqual(response.template.name, "Book template") self.assertEqual(response.content, "Blink - Malcolm Gladwell") + def test_post_file_view(self): + "POST this python file to a view" + import os, tempfile + from django.conf import settings + file = __file__.replace('.pyc', '.py') + for upload_dir in [None, tempfile.gettempdir()]: + settings.FILE_UPLOAD_DIR = upload_dir + post_data = { 'name': file, 'file': open(file) } + response = self.client.post('/test_client/post_file_view/', post_data) + self.failUnless('models.py' in response.context['file']['filename']) + self.failUnless(len(response.context['file']['content']) == os.path.getsize(file)) + if upload_dir: + self.failUnless(response.context['file']['tmpfilename']) + + def test_redirect(self): "GET a URL that redirects elsewhere" response = self.client.get('/test_client/redirect_view/') Index: tests/modeltests/test_client/urls.py =================================================================== --- tests/modeltests/test_client/urls.py (revision 5078) +++ tests/modeltests/test_client/urls.py (working copy) @@ -4,6 +4,7 @@ urlpatterns = patterns('', (r'^get_view/$', views.get_view), (r'^post_view/$', views.post_view), + (r'^post_file_view/$', views.post_file_view), (r'^raw_post_view/$', views.raw_post_view), (r'^redirect_view/$', views.redirect_view), (r'^form_view/$', views.form_view), Index: docs/request_response.txt =================================================================== --- docs/request_response.txt (revision 5078) +++ docs/request_response.txt (working copy) @@ -72,13 +72,25 @@ ``FILES`` A dictionary-like object containing all uploaded files. Each key in ``FILES`` is the ``name`` from the ````. Each - value in ``FILES`` is a standard Python dictionary with the following three + value in ``FILES`` is a standard Python dictionary with the following four keys: * ``filename`` -- The name of the uploaded file, as a Python string. * ``content-type`` -- The content type of the uploaded file. * ``content`` -- The raw content of the uploaded file. + * ``content-length`` -- The length of the content in bytes. + If streaming file uploads are enabled two additional keys + describing the uploaded file will be present: + + * ``tmpfilename`` -- The filename for the temporary file. + * ``tmpfile`` -- An open file object for the temporary file. + + The temporary file will be removed when the request finishes. + + Note that accessing ``content`` when streaming uploads are enabled + will read the whole file into memory which may not be what you want. + Note that ``FILES`` will only contain data if the request method was POST and the ``