Index: django/http/__init__.py =================================================================== --- django/http/__init__.py (revision 5722) +++ django/http/__init__.py (working copy) @@ -4,7 +4,11 @@ from urllib import urlencode from django.utils.datastructures import MultiValueDict from django.utils.encoding import smart_str, iri_to_uri, force_unicode +from django.http.multipartparser import MultiPartParser, MultiPartParserError +import re +upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression + RESERVED_CHARS="!*'();:@&=+$,/?%#[]" try: @@ -64,37 +68,55 @@ encoding = property(_get_encoding, _set_encoding) -def parse_file_upload(header_dict, post_data): - "Returns a tuple of (POST QueryDict, FILES MultiValueDict)" - import email, email.Message - from cgi import parse_header - raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()]) - raw_message += '\r\n\r\n' + post_data - msg = email.message_from_string(raw_message) - POST = QueryDict('', mutable=True) - FILES = MultiValueDict() - for submessage in msg.get_payload(): - if submessage and isinstance(submessage, email.Message.Message): - name_dict = parse_header(submessage['Content-Disposition'])[1] - # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads - # or {'name': 'blah'} for POST fields - # We assume all uploaded files have a 'filename' set. - if 'filename' in name_dict: - assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported" - if not name_dict['filename'].strip(): - continue - # IE submits the full path, so trim everything but the basename. - # (We can't use os.path.basename because it expects Linux paths.) - filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:] - FILES.appendlist(name_dict['name'], { - 'filename': filename, - 'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None, - 'content': submessage.get_payload(), - }) - else: - POST.appendlist(name_dict['name'], submessage.get_payload()) - return POST, FILES + def _get_file_progress(self): + return {} + def _set_file_progress(self,value): + pass + + def _del_file_progress(self): + pass + + file_progress = property(_get_file_progress, + _set_file_progress, + _del_file_progress) + + def _get_file_progress_from_args(self, headers, get, querystring): + """ + This parses the request for a file progress_id value. + Note that there are two distinct ways of getting the progress + ID -- header and GET. One is used primarily to attach via JavaScript + to the end of an HTML form action while the other is used for AJAX + communication. + + All progress IDs must be valid 32-digit hexadecimal numbers. + """ + if 'X-Upload-ID' in headers: + progress_id = headers['X-Upload-ID'] + elif 'progress_id' in get: + progress_id = get['progress_id'] + else: + return None + + if not upload_id_re.match(progress_id): + return None + + return progress_id + +def parse_file_upload(headers, input, request): + from django.conf import settings + + # Only stream files to disk if FILE_STREAMING_DIR is set + file_upload_dir = settings.FILE_UPLOAD_DIR + streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE + + try: + parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size) + return parser.parse() + except MultiPartParserError, e: + return MultiValueDict({ '_file_upload_error': [e.message] }), {} + + class QueryDict(MultiValueDict): """ A specialized MultiValueDict that takes a query string when initialized. Index: django/oldforms/__init__.py =================================================================== --- django/oldforms/__init__.py (revision 5722) +++ django/oldforms/__init__.py (working copy) @@ -676,16 +676,21 @@ self.validator_list = [self.isNonEmptyFile] + validator_list def isNonEmptyFile(self, field_data, all_data): - try: - content = field_data['content'] - except TypeError: + if field_data.has_key('_file_upload_error'): + raise validators.CriticalValidationError, field_data['_file_upload_error'] + if not field_data.has_key('filename'): raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.") - if not content: + if not field_data['content-length']: raise validators.CriticalValidationError, ugettext("The submitted file is empty.") def render(self, data): return u'' % \ (self.get_id(), self.__class__.__name__, self.field_name) + + def prepare(self, new_data): + if new_data.has_key('_file_upload_error'): + # pretend we got something in the field to raise a validation error later + new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] } def html2python(data): if data is None: Index: django/db/models/base.py =================================================================== --- django/db/models/base.py (revision 5722) +++ django/db/models/base.py (working copy) @@ -13,6 +13,7 @@ from django.utils.datastructures import SortedDict from django.utils.functional import curry from django.utils.encoding import smart_str, force_unicode +from django.utils.file import file_move_safe from django.conf import settings from itertools import izip import types @@ -365,12 +366,16 @@ def _get_FIELD_size(self, field): return os.path.getsize(self._get_FIELD_filename(field)) - def _save_FIELD_file(self, field, filename, raw_contents, save=True): + def _save_FIELD_file(self, field, filename, raw_field, save=True): directory = field.get_directory_name() try: # Create the date-based directory if it doesn't exist. os.makedirs(os.path.join(settings.MEDIA_ROOT, directory)) except OSError: # Directory probably already exists. pass + + if filename is None: + filename = raw_field['filename'] + filename = field.get_filename(filename) # If the filename already exists, keep adding an underscore to the name of @@ -387,9 +392,16 @@ setattr(self, field.attname, filename) full_filename = self._get_FIELD_filename(field) - fp = open(full_filename, 'wb') - fp.write(raw_contents) - fp.close() + if raw_field.has_key('tmpfilename'): + raw_field['tmpfile'].close() + file_move_safe(raw_field['tmpfilename'], full_filename) + else: + from django.utils import file_locks + fp = open(full_filename, 'wb') + # exclusive lock + file_locks.lock(fp, file_locks.LOCK_EX) + fp.write(raw_field['content']) + fp.close() # Save the width and/or height, if applicable. if isinstance(field, ImageField) and (field.width_field or field.height_field): Index: django/db/models/fields/__init__.py =================================================================== --- django/db/models/fields/__init__.py (revision 5722) +++ django/db/models/fields/__init__.py (working copy) @@ -707,7 +707,8 @@ setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self)) setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self)) setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self)) - setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save)) + setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save)) + setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save)) dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls) def delete_file(self, instance): @@ -730,9 +731,9 @@ if new_data.get(upload_field_name, False): func = getattr(new_object, 'save_%s_file' % self.name) if rel: - func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save) + func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save) else: - func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save) + func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save) def get_directory_name(self): return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to)))) Index: django/conf/global_settings.py =================================================================== --- django/conf/global_settings.py (revision 5722) +++ django/conf/global_settings.py (working copy) @@ -247,6 +247,16 @@ from django import get_version URL_VALIDATOR_USER_AGENT = "Django/%s (http://www.djangoproject.com)" % get_version() +# The directory to place streamed file uploads. The web server needs write +# permissions on this directory. +# If this is None, streaming uploads are disabled. +FILE_UPLOAD_DIR = None + +# The minimum size of a POST before file uploads are streamed to disk. +# Any less than this number, and the file is uploaded to memory. +# Size is in bytes. +STREAMING_MIN_POST_SIZE = 512 * (2**10) + ############## # MIDDLEWARE # ############## Index: django/core/handlers/wsgi.py =================================================================== --- django/core/handlers/wsgi.py (revision 5722) +++ django/core/handlers/wsgi.py (working copy) @@ -76,6 +76,7 @@ self.environ = environ self.path = force_unicode(environ['PATH_INFO']) self.META = environ + self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id() self.method = environ['REQUEST_METHOD'].upper() def __repr__(self): @@ -112,7 +113,14 @@ if self.environ.get('CONTENT_TYPE', '').startswith('multipart'): header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')]) header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '') - self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data) + header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '') + header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '') + try: + self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self) + except: + self._post, self._files = {}, {} # make sure we dont read the input stream again + raise + self._raw_post_data = None # raw data is not available for streamed multipart messages else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() else: @@ -168,6 +176,17 @@ buf.close() return self._raw_post_data + def _get_file_progress_id(self): + """ + Returns the Progress ID of the request, + usually provided if there is a file upload + going on. + Returns ``None`` if no progress ID is specified. + """ + return self._get_file_progress_from_args(self.environ, + self.GET, + self.environ.get('QUERY_STRING', '')) + GET = property(_get_get, _set_get) POST = property(_get_post, _set_post) COOKIES = property(_get_cookies, _set_cookies) Index: django/core/handlers/modpython.py =================================================================== --- django/core/handlers/modpython.py (revision 5722) +++ django/core/handlers/modpython.py (working copy) @@ -48,7 +48,12 @@ def _load_post_and_files(self): "Populates self._post and self._files" if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'): - self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data) + self._raw_post_data = None # raw data is not available for streamed multipart messages + try: + self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self) + except: + self._post, self._files = {}, {} # make sure we dont read the input stream again + raise else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() @@ -93,20 +98,21 @@ 'AUTH_TYPE': self._req.ap_auth_type, 'CONTENT_LENGTH': self._req.clength, # This may be wrong 'CONTENT_TYPE': self._req.content_type, # This may be wrong - 'GATEWAY_INTERFACE': 'CGI/1.1', - 'PATH_INFO': self._req.path_info, - 'PATH_TRANSLATED': None, # Not supported - 'QUERY_STRING': self._req.args, - 'REMOTE_ADDR': self._req.connection.remote_ip, - 'REMOTE_HOST': None, # DNS lookups not supported - 'REMOTE_IDENT': self._req.connection.remote_logname, - 'REMOTE_USER': self._req.user, - 'REQUEST_METHOD': self._req.method, - 'SCRIPT_NAME': None, # Not supported - 'SERVER_NAME': self._req.server.server_hostname, - 'SERVER_PORT': self._req.server.port, - 'SERVER_PROTOCOL': self._req.protocol, - 'SERVER_SOFTWARE': 'mod_python' + 'GATEWAY_INTERFACE': 'CGI/1.1', + 'PATH_INFO': self._req.path_info, + 'PATH_TRANSLATED': None, # Not supported + 'QUERY_STRING': self._req.args, + 'REMOTE_ADDR': self._req.connection.remote_ip, + 'REMOTE_HOST': None, # DNS lookups not supported + 'REMOTE_IDENT': self._req.connection.remote_logname, + 'REMOTE_USER': self._req.user, + 'REQUEST_METHOD': self._req.method, + 'SCRIPT_NAME': None, # Not supported + 'SERVER_NAME': self._req.server.server_hostname, + 'SERVER_PORT': self._req.server.port, + 'SERVER_PROTOCOL': self._req.protocol, + 'UPLOAD_PROGRESS_ID': self._get_file_progress_id(), + 'SERVER_SOFTWARE': 'mod_python' } for key, value in self._req.headers_in.items(): key = 'HTTP_' + key.upper().replace('-', '_') @@ -123,6 +129,17 @@ def _get_method(self): return self.META['REQUEST_METHOD'].upper() + def _get_file_progress_id(self): + """ + Returns the Progress ID of the request, + usually provided if there is a file upload + going on. + Returns ``None`` if no progress ID is specified. + """ + return self._get_file_progress_from_args(self._req.headers_in, + self.GET, + self._req.args) + GET = property(_get_get, _set_get) POST = property(_get_post, _set_post) COOKIES = property(_get_cookies, _set_cookies) Index: tests/modeltests/test_client/views.py =================================================================== --- tests/modeltests/test_client/views.py (revision 5722) +++ tests/modeltests/test_client/views.py (working copy) @@ -46,6 +46,12 @@ return HttpResponse(t.render(c)) +def post_file_view(request): + "A view that expects a multipart post and returns a file in the context" + t = Template('File {{ file.filename }} received', name='POST Template') + c = Context({'file': request.FILES['file_file']}) + return HttpResponse(t.render(c)) + def redirect_view(request): "A view that redirects all requests to the GET view" return HttpResponseRedirect('/test_client/get_view/') Index: tests/modeltests/test_client/models.py =================================================================== --- tests/modeltests/test_client/models.py (revision 5722) +++ tests/modeltests/test_client/models.py (working copy) @@ -4,7 +4,7 @@ The test client is a class that can act like a simple browser for testing purposes. - + It allows the user to compose GET and POST requests, and obtain the response that the server gave to those requests. The server Response objects are annotated with the details @@ -80,6 +80,20 @@ self.assertEqual(response.template.name, "Book template") self.assertEqual(response.content, "Blink - Malcolm Gladwell") + def test_post_file_view(self): + "POST this python file to a view" + import os, tempfile + from django.conf import settings + file = __file__.replace('.pyc', '.py') + for upload_dir in [None, tempfile.gettempdir()]: + settings.FILE_UPLOAD_DIR = upload_dir + post_data = { 'name': file, 'file': open(file) } + response = self.client.post('/test_client/post_file_view/', post_data) + self.failUnless('models.py' in response.context['file']['filename']) + self.failUnless(len(response.context['file']['content']) == os.path.getsize(file)) + if upload_dir: + self.failUnless(response.context['file']['tmpfilename']) + def test_redirect(self): "GET a URL that redirects elsewhere" response = self.client.get('/test_client/redirect_view/') Index: tests/modeltests/test_client/urls.py =================================================================== --- tests/modeltests/test_client/urls.py (revision 5722) +++ tests/modeltests/test_client/urls.py (working copy) @@ -5,6 +5,7 @@ urlpatterns = patterns('', (r'^get_view/$', views.get_view), (r'^post_view/$', views.post_view), + (r'^post_file_view/$', views.post_file_view), (r'^raw_post_view/$', views.raw_post_view), (r'^redirect_view/$', views.redirect_view), (r'^permanent_redirect_view/$', redirect_to, { 'url': '/test_client/get_view/' }), Index: docs/request_response.txt =================================================================== --- docs/request_response.txt (revision 5722) +++ docs/request_response.txt (working copy) @@ -72,13 +72,25 @@ ``FILES`` A dictionary-like object containing all uploaded files. Each key in ``FILES`` is the ``name`` from the ````. Each - value in ``FILES`` is a standard Python dictionary with the following three + value in ``FILES`` is a standard Python dictionary with the following four keys: * ``filename`` -- The name of the uploaded file, as a Python string. * ``content-type`` -- The content type of the uploaded file. * ``content`` -- The raw content of the uploaded file. + * ``content-length`` -- The length of the content in bytes. + If streaming file uploads are enabled two additional keys + describing the uploaded file will be present: + + * ``tmpfilename`` -- The filename for the temporary file. + * ``tmpfile`` -- An open file object for the temporary file. + + The temporary file will be removed when the request finishes. + + Note that accessing ``content`` when streaming uploads are enabled + will read the whole file into memory which may not be what you want. + Note that ``FILES`` will only contain data if the request method was POST and the ``