diff -r 8f50398714c1 -r ea52e616a876 django/conf/global_settings.py --- a/django/conf/global_settings.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/conf/global_settings.py Fri Feb 08 15:41:48 2008 -0500 @@ -257,6 +257,16 @@ DEFAULT_TABLESPACE = '' DEFAULT_TABLESPACE = '' DEFAULT_INDEX_TABLESPACE = '' +# The directory to place streamed file uploads. The web server needs write +# permissions on this directory. +# If this is None, streaming uploads are disabled. +FILE_UPLOAD_DIR = None + +# The minimum size of a POST before file uploads are streamed to disk. +# Any less than this number, and the file is uploaded to memory. +# Size is in bytes. +STREAMING_MIN_POST_SIZE = 512 * (2**10) + ############## # MIDDLEWARE # ############## diff -r 8f50398714c1 -r ea52e616a876 django/core/handlers/modpython.py --- a/django/core/handlers/modpython.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/core/handlers/modpython.py Fri Feb 08 15:41:48 2008 -0500 @@ -52,7 +52,12 @@ class ModPythonRequest(http.HttpRequest) def _load_post_and_files(self): "Populates self._post and self._files" if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'): - self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data) + self._raw_post_data = None # raw data is not available for streamed multipart messages + try: + self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self) + except: + self._post, self._files = {}, {} # make sure we dont read the input stream again + raise else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() @@ -97,20 +102,21 @@ class ModPythonRequest(http.HttpRequest) 'AUTH_TYPE': self._req.ap_auth_type, 'CONTENT_LENGTH': self._req.clength, # This may be wrong 'CONTENT_TYPE': self._req.content_type, # This may be wrong - 'GATEWAY_INTERFACE': 'CGI/1.1', - 'PATH_INFO': self._req.path_info, - 'PATH_TRANSLATED': None, # Not supported - 'QUERY_STRING': self._req.args, - 'REMOTE_ADDR': self._req.connection.remote_ip, - 'REMOTE_HOST': None, # DNS lookups not supported - 'REMOTE_IDENT': self._req.connection.remote_logname, - 'REMOTE_USER': self._req.user, - 'REQUEST_METHOD': self._req.method, - 'SCRIPT_NAME': None, # Not supported - 'SERVER_NAME': self._req.server.server_hostname, - 'SERVER_PORT': self._req.server.port, - 'SERVER_PROTOCOL': self._req.protocol, - 'SERVER_SOFTWARE': 'mod_python' + 'GATEWAY_INTERFACE': 'CGI/1.1', + 'PATH_INFO': self._req.path_info, + 'PATH_TRANSLATED': None, # Not supported + 'QUERY_STRING': self._req.args, + 'REMOTE_ADDR': self._req.connection.remote_ip, + 'REMOTE_HOST': None, # DNS lookups not supported + 'REMOTE_IDENT': self._req.connection.remote_logname, + 'REMOTE_USER': self._req.user, + 'REQUEST_METHOD': self._req.method, + 'SCRIPT_NAME': None, # Not supported + 'SERVER_NAME': self._req.server.server_hostname, + 'SERVER_PORT': self._req.server.port, + 'SERVER_PROTOCOL': self._req.protocol, + 'UPLOAD_PROGRESS_ID': self._get_file_progress_id(), + 'SERVER_SOFTWARE': 'mod_python' } for key, value in self._req.headers_in.items(): key = 'HTTP_' + key.upper().replace('-', '_') @@ -126,6 +132,17 @@ class ModPythonRequest(http.HttpRequest) def _get_method(self): return self.META['REQUEST_METHOD'].upper() + + def _get_file_progress_id(self): + """ + Returns the Progress ID of the request, + usually provided if there is a file upload + going on. + Returns ``None`` if no progress ID is specified. + """ + return self._get_file_progress_from_args(self._req.headers_in, + self.GET, + self._req.args) GET = property(_get_get, _set_get) POST = property(_get_post, _set_post) diff -r 8f50398714c1 -r ea52e616a876 django/core/handlers/wsgi.py --- a/django/core/handlers/wsgi.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/core/handlers/wsgi.py Fri Feb 08 15:41:48 2008 -0500 @@ -77,6 +77,7 @@ class WSGIRequest(http.HttpRequest): self.environ = environ self.path = force_unicode(environ['PATH_INFO']) self.META = environ + self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id() self.method = environ['REQUEST_METHOD'].upper() def __repr__(self): @@ -114,7 +115,14 @@ class WSGIRequest(http.HttpRequest): if self.environ.get('CONTENT_TYPE', '').startswith('multipart'): header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')]) header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '') - self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data) + header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '') + header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '') + try: + self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self) + except: + self._post, self._files = {}, {} # make sure we dont read the input stream again + raise + self._raw_post_data = None # raw data is not available for streamed multipart messages else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() else: @@ -172,6 +180,17 @@ class WSGIRequest(http.HttpRequest): buf.close() return self._raw_post_data + def _get_file_progress_id(self): + """ + Returns the Progress ID of the request, + usually provided if there is a file upload + going on. + Returns ``None`` if no progress ID is specified. + """ + return self._get_file_progress_from_args(self.environ, + self.GET, + self.environ.get('QUERY_STRING', '')) + GET = property(_get_get, _set_get) POST = property(_get_post, _set_post) COOKIES = property(_get_cookies, _set_cookies) diff -r 8f50398714c1 -r ea52e616a876 django/core/validators.py --- a/django/core/validators.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/core/validators.py Fri Feb 08 15:41:48 2008 -0500 @@ -177,17 +177,17 @@ def isValidImage(field_data, all_data): from PIL import Image from cStringIO import StringIO try: - content = field_data['content'] + filename = field_data['filename'] except TypeError: raise ValidationError, _("No file was submitted. Check the encoding type on the form.") try: # load() is the only method that can spot a truncated JPEG, # but it cannot be called sanely after verify() - trial_image = Image.open(StringIO(content)) + trial_image = Image.open(field_data.get('tmpfilename') or StringIO(field_data.get('content',''))) trial_image.load() # verify() is the only method that can spot a corrupt PNG, # but it must be called immediately after the constructor - trial_image = Image.open(StringIO(content)) + trial_image = Image.open(field_data.get('tmpfilename') or StringIO(field_data.get('content',''))) trial_image.verify() except Exception: # Python Imaging Library doesn't recognize it as an image raise ValidationError, _("Upload a valid image. The file you uploaded was either not an image or a corrupted image.") diff -r 8f50398714c1 -r ea52e616a876 django/db/models/base.py --- a/django/db/models/base.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/db/models/base.py Fri Feb 08 15:41:48 2008 -0500 @@ -12,6 +12,7 @@ from django.dispatch import dispatcher from django.dispatch import dispatcher from django.utils.datastructures import SortedDict from django.utils.functional import curry +from django.utils.file import file_move_safe from django.utils.encoding import smart_str, force_unicode, smart_unicode from django.conf import settings from itertools import izip @@ -379,12 +380,16 @@ class Model(object): def _get_FIELD_size(self, field): return os.path.getsize(self._get_FIELD_filename(field)) - def _save_FIELD_file(self, field, filename, raw_contents, save=True): + def _save_FIELD_file(self, field, filename, raw_field, save=True): directory = field.get_directory_name() try: # Create the date-based directory if it doesn't exist. os.makedirs(os.path.join(settings.MEDIA_ROOT, directory)) except OSError: # Directory probably already exists. pass + + if filename is None: + filename = raw_field['filename'] + filename = field.get_filename(filename) # If the filename already exists, keep adding an underscore to the name of @@ -401,9 +406,16 @@ class Model(object): setattr(self, field.attname, filename) full_filename = self._get_FIELD_filename(field) - fp = open(full_filename, 'wb') - fp.write(raw_contents) - fp.close() + if raw_field.has_key('tmpfilename'): + raw_field['tmpfile'].close() + file_move_safe(raw_field['tmpfilename'], full_filename) + else: + from django.utils import file_locks + fp = open(full_filename, 'wb') + # exclusive lock + file_locks.lock(fp, file_locks.LOCK_EX) + fp.write(raw_field['content']) + fp.close() # Save the width and/or height, if applicable. if isinstance(field, ImageField) and (field.width_field or field.height_field): diff -r 8f50398714c1 -r ea52e616a876 django/db/models/fields/__init__.py --- a/django/db/models/fields/__init__.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/db/models/fields/__init__.py Fri Feb 08 15:41:48 2008 -0500 @@ -761,7 +761,8 @@ class FileField(Field): setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self)) setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self)) setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self)) - setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save)) + setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save)) + setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save)) dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls) def delete_file(self, instance): @@ -784,9 +785,9 @@ class FileField(Field): if new_data.get(upload_field_name, False): func = getattr(new_object, 'save_%s_file' % self.name) if rel: - func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save) + func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save) else: - func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save) + func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save) def get_directory_name(self): return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to)))) @@ -799,7 +800,7 @@ class FileField(Field): def save_form_data(self, instance, data): from django.newforms.fields import UploadedFile if data and isinstance(data, UploadedFile): - getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False) + getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False) def formfield(self, **kwargs): defaults = {'form_class': forms.FileField} diff -r 8f50398714c1 -r ea52e616a876 django/http/__init__.py --- a/django/http/__init__.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/http/__init__.py Fri Feb 08 15:41:48 2008 -0500 @@ -1,11 +1,16 @@ import os import os +import re from Cookie import SimpleCookie from pprint import pformat from urllib import urlencode from urlparse import urljoin +from django.http.utils import str_to_unicode +from django.http.multipartparser import MultiPartParser, MultiPartParserError from django.utils.datastructures import MultiValueDict, FileDict from django.utils.encoding import smart_str, iri_to_uri, force_unicode from utils import * + +upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression RESERVED_CHARS="!*'();:@&=+$,/?%#[]" @@ -79,7 +84,7 @@ class HttpRequest(object): def is_secure(self): return os.environ.get("HTTPS") == "on" - + def _set_encoding(self, val): """ Sets the encoding used for GET/POST accesses. If the GET or POST @@ -97,38 +102,54 @@ class HttpRequest(object): encoding = property(_get_encoding, _set_encoding) -def parse_file_upload(header_dict, post_data): - "Returns a tuple of (POST QueryDict, FILES MultiValueDict)" - import email, email.Message - from cgi import parse_header - raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()]) - raw_message += '\r\n\r\n' + post_data - msg = email.message_from_string(raw_message) - POST = QueryDict('', mutable=True) - FILES = MultiValueDict() - for submessage in msg.get_payload(): - if submessage and isinstance(submessage, email.Message.Message): - name_dict = parse_header(submessage['Content-Disposition'])[1] - # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads - # or {'name': 'blah'} for POST fields - # We assume all uploaded files have a 'filename' set. - if 'filename' in name_dict: - assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported" - if not name_dict['filename'].strip(): - continue - # IE submits the full path, so trim everything but the basename. - # (We can't use os.path.basename because that uses the server's - # directory separator, which may not be the same as the - # client's one.) - filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:] - FILES.appendlist(name_dict['name'], FileDict({ - 'filename': filename, - 'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None, - 'content': submessage.get_payload(), - })) - else: - POST.appendlist(name_dict['name'], submessage.get_payload()) - return POST, FILES + def _get_file_progress(self): + return {} + + def _set_file_progress(self,value): + pass + + def _del_file_progress(self): + pass + + file_progress = property(_get_file_progress, + _set_file_progress, + _del_file_progress) + + def _get_file_progress_from_args(self, headers, get, querystring): + """ + This parses the request for a file progress_id value. + Note that there are two distinct ways of getting the progress + ID -- header and GET. One is used primarily to attach via JavaScript + to the end of an HTML form action while the other is used for AJAX + communication. + + All progress IDs must be valid 32-digit hexadecimal numbers. + """ + if 'X-Upload-ID' in headers: + progress_id = headers['X-Upload-ID'] + elif 'progress_id' in get: + progress_id = get['progress_id'] + else: + return None + + if not upload_id_re.match(progress_id): + return None + + return progress_id + +def parse_file_upload(headers, input, request): + from django.conf import settings + + # Only stream files to disk if FILE_STREAMING_DIR is set + file_upload_dir = settings.FILE_UPLOAD_DIR + streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE + + try: + parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size) + return parser.parse() + except MultiPartParserError, e: + return MultiValueDict({ '_file_upload_error': [e.message] }), {} + class QueryDict(MultiValueDict): """ @@ -413,20 +434,3 @@ class HttpResponseServerError(HttpRespon # A backwards compatible alias for HttpRequest.get_host. def get_host(request): return request.get_host() - -# It's neither necessary nor appropriate to use -# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus, -# this slightly more restricted function. -def str_to_unicode(s, encoding): - """ - Convert basestring objects to unicode, using the given encoding. Illegaly - encoded input characters are replaced with Unicode "unknown" codepoint - (\ufffd). - - Returns any non-basestring objects without change. - """ - if isinstance(s, str): - return unicode(s, encoding, 'replace') - else: - return s - diff -r 8f50398714c1 -r ea52e616a876 django/http/multipartparser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/django/http/multipartparser.py Fri Feb 08 15:41:48 2008 -0500 @@ -0,0 +1,328 @@ +""" +MultiPart parsing for file uploads. +If both a progress id is sent (either through ``X-Progress-ID`` +header or ``progress_id`` GET) and ``FILE_UPLOAD_DIR`` is set +in the settings, then the file progress will be tracked using +``request.file_progress``. + +To use this feature, consider creating a middleware with an appropriate +``process_request``:: + + class FileProgressTrack(object): + def __get__(self, request, HttpRequest): + progress_id = request.META['UPLOAD_PROGRESS_ID'] + status = # get progress from progress_id here + + return status + + def __set__(self, request, new_value): + progress_id = request.META['UPLOAD_PROGRESS_ID'] + + # set the progress using progress_id here. + + # example middleware + class FileProgressExample(object): + def process_request(self, request): + request.__class__.file_progress = FileProgressTrack() + + + +""" + +__all__ = ['MultiPartParserError','MultiPartParser'] + + +from django.utils.datastructures import MultiValueDict +from django.http.utils import str_to_unicode +from django.conf import settings +import os + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + + +class MultiPartParserError(Exception): + def __init__(self, message): + self.message = message + def __str__(self): + return repr(self.message) + +class MultiPartParser(object): + """ + A rfc2388 multipart/form-data parser. + + parse() reads the input stream in chunk_size chunks and returns a + tuple of (POST MultiValueDict, FILES MultiValueDict). If + file_upload_dir is defined files will be streamed to temporary + files in the specified directory. + + The FILES dictionary will have 'filename', 'content-type', + 'content' and 'content-length' entries. For streamed files it will + also have 'tmpfilename' and 'tmpfile'. The 'content' entry will + only be read from disk when referenced for streamed files. + + If the X-Progress-ID is sent (in one of many formats), then + object.file_progress will be given a dictionary of the progress. + """ + def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64): + try: + content_length = int(headers['Content-Length']) + except: + raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length')) + + content_type = headers.get('Content-Type') + + if not content_type or not content_type.startswith('multipart/'): + raise MultiPartParserError('Invalid Content-Type: %s' % content_type) + + ctype, opts = self.parse_header(content_type) + boundary = opts.get('boundary') + from cgi import valid_boundary + if not boundary or not valid_boundary(boundary): + raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary) + + progress_id = request.META['UPLOAD_PROGRESS_ID'] + + self._track_progress = file_upload_dir and progress_id # whether or not to track progress + self._boundary = '--' + boundary + self._input = input + self._size = content_length + self._received = 0 + self._file_upload_dir = file_upload_dir + self._chunk_size = chunk_size + self._state = 'PREAMBLE' + self._partial = '' + self._post = MultiValueDict() + self._files = MultiValueDict() + self._request = request + self._encoding = request.encoding or settings.DEFAULT_CHARSET + + if streaming_min_post_size is not None and content_length < streaming_min_post_size: + self._file_upload_dir = None # disable file streaming for small request + elif self._track_progress: + request.file_progress = {'state': 'starting'} + + try: + # Use mx fast string search if available. + from mx.TextTools import FS + self._fs = FS(self._boundary) + except ImportError: + self._fs = None + + def parse(self): + try: + self._parse() + finally: + if self._track_progress: + self._request.file_progress = {'state': 'done'} + return self._post, self._files + + def _parse(self): + size = self._size + + try: + while size > 0: + n = self._read(self._input, min(self._chunk_size, size)) + if not n: + break + size -= n + except: + # consume any remaining data so we dont generate a "Connection Reset" error + size = self._size - self._received + while size > 0: + data = self._input.read(min(self._chunk_size, size)) + size -= len(data) + raise + + def _find_boundary(self, data, start, stop): + """ + Find the next boundary and return the end of current part + and start of next part. + """ + if self._fs: + boundary = self._fs.find(data, start, stop) + else: + boundary = data.find(self._boundary, start, stop) + if boundary >= 0: + end = boundary + next = boundary + len(self._boundary) + + # backup over CRLF + if end > 0 and data[end-1] == '\n': end -= 1 + if end > 0 and data[end-1] == '\r': end -= 1 + # skip over --CRLF + if next < stop and data[next] == '-': next += 1 + if next < stop and data[next] == '-': next += 1 + if next < stop and data[next] == '\r': next += 1 + if next < stop and data[next] == '\n': next += 1 + + return True, end, next + else: + return False, stop, stop + + class TemporaryFile(object): + "A temporary file that tries to delete itself when garbage collected." + def __init__(self, dir): + import tempfile + (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir) + self.file = os.fdopen(fd, 'w+b') + self.name = name + + def __getattr__(self, name): + a = getattr(self.__dict__['file'], name) + if type(a) != type(0): + setattr(self, name, a) + return a + + def __del__(self): + try: + os.unlink(self.name) + except OSError: + pass + + class LazyContent(dict): + """ + A lazy FILES dictionary entry that reads the contents from + tmpfile only when referenced. + """ + def __init__(self, data): + dict.__init__(self, data) + + def __getitem__(self, key): + if key == 'content' and not self.has_key(key): + self['tmpfile'].seek(0) + self['content'] = self['tmpfile'].read() + return dict.__getitem__(self, key) + + def _read(self, input, size): + data = input.read(size) + + if not data: + return 0 + + read_size = len(data) + self._received += read_size + + if self._partial: + data = self._partial + data + + start = 0 + stop = len(data) + + while start < stop: + boundary, end, next = self._find_boundary(data, start, stop) + + if not boundary and read_size: + # make sure we dont treat a partial boundary (and its separators) as data + stop -= len(self._boundary) + 16 + end = next = stop + if end <= start: + break # need more data + + if self._state == 'PREAMBLE': + # Preamble, just ignore it + self._state = 'HEADER' + + elif self._state == 'HEADER': + # Beginning of header, look for end of header and parse it if found. + + header_end = data.find('\r\n\r\n', start, stop) + if header_end == -1: + break # need more data + + header = data[start:header_end] + + self._fieldname = None + self._filename = None + self._content_type = None + + for line in header.split('\r\n'): + ctype, opts = self.parse_header(line) + if ctype == 'content-disposition: form-data': + self._fieldname = opts.get('name') + self._filename = opts.get('filename') + elif ctype.startswith('content-type: '): + self._content_type = ctype[14:] + + if self._filename is not None: + # cleanup filename from IE full paths: + self._filename = self._filename[self._filename.rfind("\\")+1:].strip() + + if self._filename: # ignore files without filenames + if self._file_upload_dir: + try: + self._file = self.TemporaryFile(dir=self._file_upload_dir) + except (OSError, IOError), e: + raise MultiPartParserError("Failed to create temporary file. Error was %s" % e) + else: + self._file = StringIO() + else: + self._file = None + self._filesize = 0 + self._state = 'FILE' + else: + self._field = StringIO() + self._state = 'FIELD' + next = header_end + 4 + + elif self._state == 'FIELD': + # In a field, collect data until a boundary is found. + + self._field.write(data[start:end]) + if boundary: + if self._fieldname: + self._post.appendlist(self._fieldname, str_to_unicode(self._field.getvalue(), self._encoding)) + self._field.close() + self._state = 'HEADER' + + elif self._state == 'FILE': + # In a file, collect data until a boundary is found. + + if self._file: + try: + self._file.write(data[start:end]) + except IOError, e: + raise MultiPartParserError("Failed to write to temporary file.") + self._filesize += end-start + + if self._track_progress: + self._request.file_progress = {'received': self._received, + 'size': self._size, + 'state': 'uploading'} + + if boundary: + if self._file: + if self._file_upload_dir: + self._file.seek(0) + file = self.LazyContent({ + 'filename': str_to_unicode(self._filename, self._encoding), + 'content-type': self._content_type, + # 'content': is read on demand + 'content-length': self._filesize, + 'tmpfilename': self._file.name, + 'tmpfile': self._file + }) + else: + file = { + 'filename': str_to_unicode(self._filename, self._encoding), + 'content-type': self._content_type, + 'content': self._file.getvalue(), + 'content-length': self._filesize + } + self._file.close() + + self._files.appendlist(self._fieldname, file) + + self._state = 'HEADER' + + start = next + + self._partial = data[start:] + + return read_size + + def parse_header(self, line): + from cgi import parse_header + return parse_header(line) diff -r 8f50398714c1 -r ea52e616a876 django/http/utils.py --- a/django/http/utils.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/http/utils.py Fri Feb 08 15:41:48 2008 -0500 @@ -1,3 +1,19 @@ +# It's neither necessary nor appropriate to use +# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus, +# this slightly more restricted function. +def str_to_unicode(s, encoding): + """ + Convert basestring objects to unicode, using the given encoding. Illegaly + encoded input characters are replaced with Unicode "unknown" codepoint + (\ufffd). + + Returns any non-basestring objects without change. + """ + if isinstance(s, str): + return unicode(s, encoding, 'replace') + else: + return s + """ Functions that modify an HTTP request or response in some way. """ diff -r 8f50398714c1 -r ea52e616a876 django/newforms/fields.py --- a/django/newforms/fields.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/newforms/fields.py Fri Feb 08 15:41:48 2008 -0500 @@ -415,9 +415,9 @@ except ImportError: class UploadedFile(StrAndUnicode): "A wrapper for files uploaded in a FileField" - def __init__(self, filename, content): + def __init__(self, filename, data): self.filename = filename - self.content = content + self.data = data def __unicode__(self): """ @@ -444,12 +444,12 @@ class FileField(Field): elif not data and initial: return initial try: - f = UploadedFile(data['filename'], data['content']) + f = UploadedFile(data['filename'], data) except TypeError: raise ValidationError(self.error_messages['invalid']) except KeyError: raise ValidationError(self.error_messages['missing']) - if not f.content: + if not f.data.get('content-length'): raise ValidationError(self.error_messages['empty']) return f @@ -473,11 +473,11 @@ class ImageField(FileField): try: # load() is the only method that can spot a truncated JPEG, # but it cannot be called sanely after verify() - trial_image = Image.open(StringIO(f.content)) + trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content'])) trial_image.load() # verify() is the only method that can spot a corrupt PNG, # but it must be called immediately after the constructor - trial_image = Image.open(StringIO(f.content)) + trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content'])) trial_image.verify() except Exception: # Python Imaging Library doesn't recognize it as an image raise ValidationError(self.error_messages['invalid_image']) diff -r 8f50398714c1 -r ea52e616a876 django/oldforms/__init__.py --- a/django/oldforms/__init__.py Fri Feb 08 07:01:23 2008 -0500 +++ b/django/oldforms/__init__.py Fri Feb 08 15:41:48 2008 -0500 @@ -681,16 +681,21 @@ class FileUploadField(FormField): self.validator_list = [self.isNonEmptyFile] + validator_list def isNonEmptyFile(self, field_data, all_data): - try: - content = field_data['content'] - except TypeError: + if field_data.has_key('_file_upload_error'): + raise validators.CriticalValidationError, field_data['_file_upload_error'] + if not field_data.has_key('filename'): raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.") - if not content: + if not field_data['content-length']: raise validators.CriticalValidationError, ugettext("The submitted file is empty.") def render(self, data): return mark_safe(u'' % \ (self.get_id(), self.__class__.__name__, self.field_name)) + + def prepare(self, new_data): + if new_data.has_key('_file_upload_error'): + # pretend we got something in the field to raise a validation error later + new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] } def html2python(data): if data is None: diff -r 8f50398714c1 -r ea52e616a876 django/utils/file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/django/utils/file.py Fri Feb 08 15:41:48 2008 -0500 @@ -0,0 +1,53 @@ +import os + +__all__ = ['file_move_safe'] + +try: + import shutil + file_move = shutil.move +except ImportError: + file_move = os.rename + +def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False): + """ + Moves a file from one location to another in the safest way possible. + + First, it tries using shutils.move, which is OS-dependent but doesn't + break with change of filesystems. Then it tries os.rename, which will + break if it encounters a change in filesystems. Lastly, it streams + it manually from one file to another in python. + + Without ``allow_overwrite``, if the destination file exists, the + file will raise an IOError. + """ + + from django.utils import file_locks + + if old_file_name == new_file_name: + # No file moving takes place. + return + + if not allow_overwrite and os.path.exists(new_file_name): + raise IOError, "Django does not allow overwriting files." + + try: + file_move(old_file_name, new_file_name) + return + except OSError: # moving to another filesystem + pass + + new_file = open(new_file_name, 'wb') + # exclusive lock + file_locks.lock(new_file, file_locks.LOCK_EX) + old_file = open(old_file_name, 'rb') + current_chunk = None + + while current_chunk != '': + current_chunk = old_file.read(chunk_size) + new_file.write(current_chunk) + + new_file.close() + old_file.close() + + os.remove(old_file_name) + diff -r 8f50398714c1 -r ea52e616a876 django/utils/file_locks.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/django/utils/file_locks.py Fri Feb 08 15:41:48 2008 -0500 @@ -0,0 +1,50 @@ +""" +Locking portability by Jonathan Feignberg in python cookbook + +Example Usage:: + + from django.utils import file_locks + + f = open('./file', 'wb') + + file_locks.lock(f, file_locks.LOCK_EX) + f.write('Django') + f.close() +""" + + +import os + +__all__ = ['LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock'] + +if os.name == 'nt': + import win32con + import win32file + import pywintypes + LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK + LOCK_SH = 0 + LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY + __overlapped = pywintypes.OVERLAPPED() +elif os.name == 'posix': + import fcntl + LOCK_EX = fcntl.LOCK_EX + LOCK_SH = fcntl.LOCK_SH + LOCK_NB = fcntl.LOCK_NB +else: + raise RuntimeError("Locking only defined for nt and posix platforms") + +if os.name == 'nt': + def lock(file, flags): + hfile = win32file._get_osfhandle(file.fileno()) + win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped) + + def unlock(file): + hfile = win32file._get_osfhandle(file.fileno()) + win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped) + +elif os.name =='posix': + def lock(file, flags): + fcntl.flock(file.fileno(), flags) + + def unlock(file): + fcntl.flock(file.fileno(), fcntl.LOCK_UN) diff -r 8f50398714c1 -r ea52e616a876 docs/forms.txt --- a/docs/forms.txt Fri Feb 08 07:01:23 2008 -0500 +++ b/docs/forms.txt Fri Feb 08 15:41:48 2008 -0500 @@ -475,6 +475,19 @@ this:: new_data = request.POST.copy() new_data.update(request.FILES) +Streaming file uploads. +----------------------- + +File uploads will be read into memory by default. This works fine for +small to medium sized uploads (from 1MB to 100MB depending on your +setup and usage). If you want to support larger uploads you can enable +upload streaming where only a small part of the file will be in memory +at any time. To do this you need to specify the ``FILE_UPLOAD_DIR`` +setting (see the settings_ document for more details). + +See `request object`_ for more details about ``request.FILES`` objects +with streaming file uploads enabled. + Validators ========== @@ -698,3 +711,4 @@ fails. If no message is passed in, a def .. _`generic views`: ../generic_views/ .. _`models API`: ../model-api/ .. _settings: ../settings/ +.. _request object: ../request_response/#httprequest-objects diff -r 8f50398714c1 -r ea52e616a876 docs/request_response.txt --- a/docs/request_response.txt Fri Feb 08 07:01:23 2008 -0500 +++ b/docs/request_response.txt Fri Feb 08 15:41:48 2008 -0500 @@ -82,12 +82,24 @@ All attributes except ``session`` should ``FILES`` A dictionary-like object containing all uploaded files. Each key in ``FILES`` is the ``name`` from the ````. Each - value in ``FILES`` is a standard Python dictionary with the following three + value in ``FILES`` is a standard Python dictionary with the following four keys: * ``filename`` -- The name of the uploaded file, as a Python string. * ``content-type`` -- The content type of the uploaded file. * ``content`` -- The raw content of the uploaded file. + * ``content-length`` -- The length of the content in bytes. + + If streaming file uploads are enabled two additional keys + describing the uploaded file will be present: + + * ``tmpfilename`` -- The filename for the temporary file. + * ``tmpfile`` -- An open file object for the temporary file. + + The temporary file will be removed when the request finishes. + + Note that accessing ``content`` when streaming uploads are enabled + will read the whole file into memory which may not be what you want. Note that ``FILES`` will only contain data if the request method was POST and the ``
`` that posted to the request had diff -r 8f50398714c1 -r ea52e616a876 docs/settings.txt --- a/docs/settings.txt Fri Feb 08 07:01:23 2008 -0500 +++ b/docs/settings.txt Fri Feb 08 15:41:48 2008 -0500 @@ -521,6 +521,15 @@ these paths should use Unix-style forwar .. _Testing Django Applications: ../testing/ +FILE_UPLOAD_DIR +--------------- + +Default: ``None`` + +Path to a directory where temporary files should be written during +file uploads. Leaving this as ``None`` will disable streaming file uploads, +and cause all uploaded files to be stored (temporarily) in memory. + IGNORABLE_404_ENDS ------------------ @@ -888,6 +897,16 @@ See the `site framework docs`_. .. _site framework docs: ../sites/ +STREAMING_MIN_POST_SIZE +----------------------- + +Default: 524288 (``512*1024``) + +An integer specifying the minimum number of bytes that has to be +received (in a POST) for file upload streaming to take place. Any +request smaller than this will be handled in memory. +Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming. + TEMPLATE_CONTEXT_PROCESSORS --------------------------- diff -r 8f50398714c1 -r ea52e616a876 tests/modeltests/model_forms/models.py --- a/tests/modeltests/model_forms/models.py Fri Feb 08 07:01:23 2008 -0500 +++ b/tests/modeltests/model_forms/models.py Fri Feb 08 15:41:48 2008 -0500 @@ -736,7 +736,7 @@ False # Upload a file and ensure it all works as expected. ->>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test1.txt', 'content': 'hello world'}}) +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test1.txt', 'content': 'hello world', 'content-length':len('hello world')}}) >>> f.is_valid() True >>> type(f.cleaned_data['file']) @@ -763,7 +763,7 @@ u'.../test1.txt' # Override the file by uploading a new one. ->>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test2.txt', 'content': 'hello world'}}, instance=instance) +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test2.txt', 'content': 'hello world', 'content-length':len('hello world')}}, instance=instance) >>> f.is_valid() True >>> instance = f.save() @@ -782,7 +782,7 @@ True >>> instance.file '' ->>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test3.txt', 'content': 'hello world'}}, instance=instance) +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test3.txt', 'content': 'hello world', 'content-length':len('hello world')}}, instance=instance) >>> f.is_valid() True >>> instance = f.save() @@ -802,7 +802,7 @@ u'.../test3.txt' >>> image_data = open(os.path.join(os.path.dirname(__file__), "test.png")).read() ->>> f = ImageFileForm(data={'description': u'An image'}, files={'image': {'filename': 'test.png', 'content': image_data}}) +>>> f = ImageFileForm(data={'description': u'An image'}, files={'image': {'filename': 'test.png', 'content': image_data}, 'content-length':len(image_data)}) >>> f.is_valid() True >>> type(f.cleaned_data['image']) @@ -829,7 +829,7 @@ u'.../test.png' # Override the file by uploading a new one. ->>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': {'filename': 'test2.png', 'content': image_data}}, instance=instance) +>>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': {'filename': 'test2.png', 'content': image_data}, 'content-length':len(image_data)}, instance=instance) >>> f.is_valid() True >>> instance = f.save() @@ -848,7 +848,7 @@ True >>> instance.image '' ->>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': {'filename': 'test3.png', 'content': image_data}}, instance=instance) +>>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': {'filename': 'test3.png', 'content': image_data, 'content-length':len(image_data)}}, instance=instance) >>> f.is_valid() True >>> instance = f.save() diff -r 8f50398714c1 -r ea52e616a876 tests/modeltests/test_client/models.py --- a/tests/modeltests/test_client/models.py Fri Feb 08 07:01:23 2008 -0500 +++ b/tests/modeltests/test_client/models.py Fri Feb 08 15:41:48 2008 -0500 @@ -79,6 +79,21 @@ class ClientTest(TestCase): self.assertEqual(response.status_code, 200) self.assertEqual(response.template.name, "Book template") self.assertEqual(response.content, "Blink - Malcolm Gladwell") + + def test_post_file_view(self): + "POST this python file to a view" + import os, tempfile + from django.conf import settings + file = __file__.replace('.pyc', '.py') + for upload_dir, streaming_size in [(None,512*1000), (tempfile.gettempdir(), 1)]: + settings.FILE_UPLOAD_DIR = upload_dir + settings.STREAMING_MIN_POST_SIZE = streaming_size + post_data = { 'name': file, 'file_file': open(file) } + response = self.client.post('/test_client/post_file_view/', post_data) + self.failUnless('models.py' in response.context['file']['filename']) + self.failUnless(len(response.context['file']['content']) == os.path.getsize(file)) + if upload_dir: + self.failUnless(response.context['file']['tmpfilename']) def test_redirect(self): "GET a URL that redirects elsewhere" diff -r 8f50398714c1 -r ea52e616a876 tests/modeltests/test_client/urls.py --- a/tests/modeltests/test_client/urls.py Fri Feb 08 07:01:23 2008 -0500 +++ b/tests/modeltests/test_client/urls.py Fri Feb 08 15:41:48 2008 -0500 @@ -5,6 +5,7 @@ urlpatterns = patterns('', urlpatterns = patterns('', (r'^get_view/$', views.get_view), (r'^post_view/$', views.post_view), + (r'^post_file_view/$', views.post_file_view), (r'^raw_post_view/$', views.raw_post_view), (r'^redirect_view/$', views.redirect_view), (r'^permanent_redirect_view/$', redirect_to, { 'url': '/test_client/get_view/' }), diff -r 8f50398714c1 -r ea52e616a876 tests/modeltests/test_client/views.py --- a/tests/modeltests/test_client/views.py Fri Feb 08 07:01:23 2008 -0500 +++ b/tests/modeltests/test_client/views.py Fri Feb 08 15:41:48 2008 -0500 @@ -45,6 +45,12 @@ def raw_post_view(request): t = Template("GET request.", name="Book GET template") c = Context() + return HttpResponse(t.render(c)) + +def post_file_view(request): + "A view that expects a multipart post and returns a file in the context" + t = Template('File {{ file.filename }} received', name='POST Template') + c = Context({'file': request.FILES['file_file']}) return HttpResponse(t.render(c)) def redirect_view(request): diff -r 8f50398714c1 -r ea52e616a876 tests/regressiontests/forms/fields.py --- a/tests/regressiontests/forms/fields.py Fri Feb 08 07:01:23 2008 -0500 +++ b/tests/regressiontests/forms/fields.py Fri Feb 08 15:41:48 2008 -0500 @@ -788,7 +788,7 @@ Traceback (most recent call last): ... ValidationError: [u'No file was submitted. Check the encoding type on the form.'] ->>> f.clean({'filename': 'name', 'content': None}) +>>> f.clean({'filename': 'name', 'content': None, 'content-length': 0}) Traceback (most recent call last): ... ValidationError: [u'The submitted file is empty.'] @@ -798,10 +798,10 @@ Traceback (most recent call last): ... ValidationError: [u'The submitted file is empty.'] ->>> type(f.clean({'filename': 'name', 'content': 'Some File Content'})) +>>> type(f.clean({'filename': 'name', 'content': 'Some File Content', 'content-length': len('Some File Content')})) ->>> type(f.clean({'filename': 'name', 'content': 'Some File Content'}, 'files/test4.pdf')) +>>> type(f.clean({'filename': 'name', 'content': 'Some File Content', 'content-length': len('Some File Content')}, 'files/test4.pdf')) # URLField ################################################################## diff -r 8f50398714c1 -r ea52e616a876 tests/regressiontests/forms/forms.py --- a/tests/regressiontests/forms/forms.py Fri Feb 08 07:01:23 2008 -0500 +++ b/tests/regressiontests/forms/forms.py Fri Feb 08 15:41:48 2008 -0500 @@ -1410,7 +1410,7 @@ not request.POST. >>> print f File1:
  • No file was submitted. Check the encoding type on the form.
->>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':'some content'}}, auto_id=False) +>>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':'some content', 'content-length': len('some content')}}, auto_id=False) >>> print f File1: >>> f.is_valid()