Code

Ticket #2070: 5127_file_uploads_no_streaming_fixed.diff

File 5127_file_uploads_no_streaming_fixed.diff, 39.2 KB (added by SmileyChris, 7 years ago)

There was an error uploading large files with streaming turned off

Line 
1Index: django/conf/global_settings.py
2===================================================================
3--- django/conf/global_settings.py      (revision 5127)
4+++ django/conf/global_settings.py      (working copy)
5@@ -240,6 +240,16 @@
6 # isExistingURL validator.
7 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
8 
9+# The directory to place streamed file uploads. The web server needs write
10+# permissions on this directory.
11+# If this is None, streaming uploads are disabled.
12+FILE_UPLOAD_DIR = None
13+
14+# The minimum size of a POST before file uploads are streamed to disk.
15+# Any less than this number, and the file is uploaded to memory.
16+# Size is in bytes.
17+STREAMING_MIN_POST_SIZE = 512 * (2**10)
18+
19 ##############
20 # MIDDLEWARE #
21 ##############
22Index: django/core/handlers/base.py
23===================================================================
24--- django/core/handlers/base.py        (revision 5127)
25+++ django/core/handlers/base.py        (working copy)
26@@ -5,7 +5,7 @@
27 
28 class BaseHandler(object):
29     def __init__(self):
30-        self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
31+        self._upload_middleware = self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
32 
33     def load_middleware(self):
34         """
35@@ -19,6 +19,7 @@
36         self._view_middleware = []
37         self._response_middleware = []
38         self._exception_middleware = []
39+        self._upload_middleware = []
40         for middleware_path in settings.MIDDLEWARE_CLASSES:
41             try:
42                 dot = middleware_path.rindex('.')
43@@ -47,13 +48,28 @@
44                 self._response_middleware.insert(0, mw_instance.process_response)
45             if hasattr(mw_instance, 'process_exception'):
46                 self._exception_middleware.insert(0, mw_instance.process_exception)
47+            if hasattr(mw_instance, 'process_upload'):
48+                self._upload_middleware.append(mw_instance.process_upload)
49 
50+    def file_progress_descriptor(self, request):
51+        """
52+        Returns a descriptor that manages the file_progress
53+        """
54+        for mw_call in self._upload_middleware:
55+            result = mw_call(http.MultiPartParserError)
56+            if result != None:
57+                return result
58+        return http.DefaultFileProgressDescriptor(http.MultiPartParserError)
59+
60     def get_response(self, request):
61         "Returns an HttpResponse object for the given HttpRequest"
62         from django.core import exceptions, urlresolvers
63         from django.core.mail import mail_admins
64         from django.conf import settings
65 
66+        # Add file_progress descriptor
67+        request._file_progress = self.file_progress_descriptor(request)
68+
69         # Apply request middleware
70         for middleware_method in self._request_middleware:
71             response = middleware_method(request)
72Index: django/core/handlers/modpython.py
73===================================================================
74--- django/core/handlers/modpython.py   (revision 5127)
75+++ django/core/handlers/modpython.py   (working copy)
76@@ -47,7 +47,12 @@
77     def _load_post_and_files(self):
78         "Populates self._post and self._files"
79         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
80-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
81+            self._raw_post_data = None # raw data is not available for streamed multipart messages
82+            try:
83+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
84+            except:
85+                self._post, self._files = {}, {} # make sure we dont read the input stream again
86+                raise
87         else:
88             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
89 
90@@ -92,20 +97,21 @@
91                 'AUTH_TYPE':         self._req.ap_auth_type,
92                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
93                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
94-                'GATEWAY_INTERFACE': 'CGI/1.1',
95-                'PATH_INFO':         self._req.path_info,
96-                'PATH_TRANSLATED':   None, # Not supported
97-                'QUERY_STRING':      self._req.args,
98-                'REMOTE_ADDR':       self._req.connection.remote_ip,
99-                'REMOTE_HOST':       None, # DNS lookups not supported
100-                'REMOTE_IDENT':      self._req.connection.remote_logname,
101-                'REMOTE_USER':       self._req.user,
102-                'REQUEST_METHOD':    self._req.method,
103-                'SCRIPT_NAME':       None, # Not supported
104-                'SERVER_NAME':       self._req.server.server_hostname,
105-                'SERVER_PORT':       self._req.server.port,
106-                'SERVER_PROTOCOL':   self._req.protocol,
107-                'SERVER_SOFTWARE':   'mod_python'
108+                'GATEWAY_INTERFACE':  'CGI/1.1',
109+                'PATH_INFO':          self._req.path_info,
110+                'PATH_TRANSLATED':    None, # Not supported
111+                'QUERY_STRING':       self._req.args,
112+                'REMOTE_ADDR':        self._req.connection.remote_ip,
113+                'REMOTE_HOST':        None, # DNS lookups not supported
114+                'REMOTE_IDENT':       self._req.connection.remote_logname,
115+                'REMOTE_USER':        self._req.user,
116+                'REQUEST_METHOD':     self._req.method,
117+                'SCRIPT_NAME':        None, # Not supported
118+                'SERVER_NAME':        self._req.server.server_hostname,
119+                'SERVER_PORT':        self._req.server.port,
120+                'SERVER_PROTOCOL':    self._req.protocol,
121+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
122+                'SERVER_SOFTWARE':    'mod_python'
123             }
124             for key, value in self._req.headers_in.items():
125                 key = 'HTTP_' + key.upper().replace('-', '_')
126@@ -122,6 +128,17 @@
127     def _get_method(self):
128         return self.META['REQUEST_METHOD'].upper()
129 
130+    def _get_file_progress_id(self):
131+        """
132+        Returns the Progress ID of the request,
133+        usually provided if there is a file upload
134+        going on.
135+        Returns ``None`` if no progress ID is specified.
136+        """
137+        return self._get_file_progress_from_args(self._req.headers_in,
138+                                                 self.GET,
139+                                                 self._req.args)
140+
141     GET = property(_get_get, _set_get)
142     POST = property(_get_post, _set_post)
143     COOKIES = property(_get_cookies, _set_cookies)
144Index: django/core/handlers/wsgi.py
145===================================================================
146--- django/core/handlers/wsgi.py        (revision 5127)
147+++ django/core/handlers/wsgi.py        (working copy)
148@@ -75,6 +75,7 @@
149         self.environ = environ
150         self.path = environ['PATH_INFO']
151         self.META = environ
152+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
153         self.method = environ['REQUEST_METHOD'].upper()
154 
155     def __repr__(self):
156@@ -111,7 +112,14 @@
157             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
158                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
159                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
160-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
161+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
162+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
163+                try:
164+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
165+                except:
166+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
167+                    raise
168+                self._raw_post_data = None # raw data is not available for streamed multipart messages
169             else:
170                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
171         else:
172@@ -167,6 +175,17 @@
173             buf.close()
174             return self._raw_post_data
175 
176+    def _get_file_progress_id(self):
177+        """
178+        Returns the Progress ID of the request,
179+        usually provided if there is a file upload
180+        going on.
181+        Returns ``None`` if no progress ID is specified.
182+        """
183+        return self._get_file_progress_from_args(self.environ,
184+                                                 self.GET,
185+                                                 self.environ.get('QUERY_STRING', ''))
186+
187     GET = property(_get_get, _set_get)
188     POST = property(_get_post, _set_post)
189     COOKIES = property(_get_cookies, _set_cookies)
190Index: django/db/models/base.py
191===================================================================
192--- django/db/models/base.py    (revision 5127)
193+++ django/db/models/base.py    (working copy)
194@@ -12,12 +12,14 @@
195 from django.dispatch import dispatcher
196 from django.utils.datastructures import SortedDict
197 from django.utils.functional import curry
198+from django.utils.file import file_move_safe
199 from django.conf import settings
200 from itertools import izip
201 import types
202 import sys
203 import os
204 
205+               
206 class ModelBase(type):
207     "Metaclass for all models"
208     def __new__(cls, name, bases, attrs):
209@@ -361,7 +363,7 @@
210     def _get_FIELD_size(self, field):
211         return os.path.getsize(self._get_FIELD_filename(field))
212 
213-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
214+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
215         directory = field.get_directory_name()
216         try: # Create the date-based directory if it doesn't exist.
217             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
218@@ -383,9 +385,13 @@
219         setattr(self, field.attname, filename)
220 
221         full_filename = self._get_FIELD_filename(field)
222-        fp = open(full_filename, 'wb')
223-        fp.write(raw_contents)
224-        fp.close()
225+        if raw_field.has_key('tmpfilename'):
226+            raw_field['tmpfile'].close()
227+            file_move_safe(raw_field['tmpfilename'], full_filename)
228+        else:
229+            fp = open(full_filename, 'wb')
230+            fp.write(raw_field['content'])
231+            fp.close()
232 
233         # Save the width and/or height, if applicable.
234         if isinstance(field, ImageField) and (field.width_field or field.height_field):
235Index: django/db/models/fields/__init__.py
236===================================================================
237--- django/db/models/fields/__init__.py (revision 5127)
238+++ django/db/models/fields/__init__.py (working copy)
239@@ -638,7 +638,7 @@
240         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
241         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
242         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
243-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
244+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
245         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
246 
247     def delete_file(self, instance):
248@@ -661,9 +661,9 @@
249         if new_data.get(upload_field_name, False):
250             func = getattr(new_object, 'save_%s_file' % self.name)
251             if rel:
252-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
253+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
254             else:
255-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
256+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
257 
258     def get_directory_name(self):
259         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
260Index: django/http/__init__.py
261===================================================================
262--- django/http/__init__.py     (revision 5127)
263+++ django/http/__init__.py     (working copy)
264@@ -1,9 +1,16 @@
265-import os
266+import os, pickle
267 from Cookie import SimpleCookie
268 from pprint import pformat
269 from urllib import urlencode, quote
270 from django.utils.datastructures import MultiValueDict
271+from django.http.file_descriptor import DefaultFileProgressDescriptor
272+import re
273 
274+try:
275+    from cStringIO import StringIO
276+except ImportError:
277+    from StringIO import StringIO
278+
279 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
280 
281 try:
282@@ -12,11 +19,28 @@
283 except ImportError:
284     from cgi import parse_qsl
285 
286+class MetaFileProgressDescriptor(object):
287+    """
288+    This descriptor allows other descriptors to
289+    be loaded in runtime to a request instance.
290+    """
291+    def __get__(self, request, *args, **kwargs):
292+        return request._file_progress.__get__(request, *args, **kwargs)
293+
294+    def __set__(self, request, *args, **kwargs):
295+        return request._file_progress.__set__(request, *args, **kwargs)
296+
297+    def __delete__(self, request, *args, **kwargs):
298+        return request._file_progress.__delete__(request, *args, **kwargs)
299+
300 class Http404(Exception):
301     pass
302 
303 class HttpRequest(object):
304     "A basic HTTP request"
305+    upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$')
306+    file_progress = MetaFileProgressDescriptor()
307+
308     def __init__(self):
309         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
310         self.path = ''
311@@ -42,37 +66,331 @@
312     def is_secure(self):
313         return os.environ.get("HTTPS") == "on"
314 
315-def parse_file_upload(header_dict, post_data):
316-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
317-    import email, email.Message
318-    from cgi import parse_header
319-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
320-    raw_message += '\r\n\r\n' + post_data
321-    msg = email.message_from_string(raw_message)
322-    POST = MultiValueDict()
323-    FILES = MultiValueDict()
324-    for submessage in msg.get_payload():
325-        if submessage and isinstance(submessage, email.Message.Message):
326-            name_dict = parse_header(submessage['Content-Disposition'])[1]
327-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
328-            # or {'name': 'blah'} for POST fields
329-            # We assume all uploaded files have a 'filename' set.
330-            if 'filename' in name_dict:
331-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
332-                if not name_dict['filename'].strip():
333-                    continue
334-                # IE submits the full path, so trim everything but the basename.
335-                # (We can't use os.path.basename because it expects Linux paths.)
336-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
337-                FILES.appendlist(name_dict['name'], {
338-                    'filename': filename,
339-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
340-                    'content': submessage.get_payload(),
341-                })
342-            else:
343-                POST.appendlist(name_dict['name'], submessage.get_payload())
344-    return POST, FILES
345+    def _get_file_progress_from_args(self, headers, get, querystring):
346+        if 'X-Upload-ID' in headers:
347+            progress_id = headers['X-Upload-ID']
348+        elif 'X-Progress-ID' in headers:
349+            progress_id = headers['X-Progress-ID']
350+        elif 'HTTP_X_UPLOAD_ID' in headers:
351+            progress_id = headers['HTTP_X_UPLOAD_ID']
352+        elif 'HTTP_X_PROGRESS_ID' in headers:
353+            progress_id = headers['HTTP_X_PROGRESS_ID']
354+        elif 'upload_id' in get:
355+            progress_id = get['upload_id']
356+        elif 'progress_id' in get:
357+            progress_id = get['progress_id']
358+        elif querystring and len(querystring.strip()) == 32:
359+            progress_id = querystring
360+        else:
361+            return None
362 
363+        if not self.upload_id_re.match(progress_id):
364+            return None
365+
366+        return progress_id
367+
368+def parse_file_upload(headers, input, request):
369+    from django.conf import settings
370+
371+    # Only stream files to disk if FILE_STREAMING_DIR is set
372+    file_upload_dir = settings.FILE_UPLOAD_DIR
373+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
374+
375+    try:
376+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
377+        return parser.parse()
378+    except MultiPartParserError, e:
379+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
380+
381+class MultiPartParserError(Exception):
382+    def __init__(self, message):
383+        self.message = message
384+    def __str__(self):
385+        return repr(self.message)
386+
387+class MultiPartParser(object):
388+    """
389+    A rfc2388 multipart/form-data parser.
390+   
391+    parse() reads the input stream in chunk_size chunks and returns a
392+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
393+    file_upload_dir is defined files will be streamed to temporary
394+    files in the specified directory.
395+
396+    The FILES dictionary will have 'filename', 'content-type',
397+    'content' and 'content-length' entries. For streamed files it will
398+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
399+    only be read from disk when referenced for streamed files.
400+
401+    If the header X-Progress-ID is sent with a 32 character hex string
402+    a temporary file with the same name will be created in
403+    `file_upload_dir`` with a pickled { 'received', 'size' }
404+    dictionary with the number of bytes received and the size expected
405+    respectively. The file will be unlinked when the parser finishes.
406+    """
407+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
408+        try:
409+            content_length = int(headers['Content-Length'])
410+        except:
411+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
412+
413+        content_type = headers.get('Content-Type')
414+
415+        if not content_type or not content_type.startswith('multipart/'):
416+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
417+           
418+        ctype, opts = self.parse_header(content_type)
419+        boundary = opts.get('boundary')
420+        from cgi import valid_boundary
421+        if not boundary or not valid_boundary(boundary):
422+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
423+
424+        progress_id = request.META['UPLOAD_PROGRESS_ID']
425+
426+        if file_upload_dir and progress_id:
427+            self._progress_filename = os.path.join(file_upload_dir, progress_id)
428+        else:
429+            self._progress_filename = None
430+        self._boundary = '--' + boundary
431+        self._input = input
432+        self._size = content_length
433+        self._received = 0
434+        self._file_upload_dir = file_upload_dir
435+        self._chunk_size = chunk_size
436+        self._state = 'PREAMBLE'
437+        self._partial = ''
438+        self._post = MultiValueDict()
439+        self._files = MultiValueDict()
440+        self._request = request
441+
442+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
443+            self._file_upload_dir = None # disable file streaming for small request
444+        elif self._progress_filename:
445+            request.file_progress = {'state': 'starting'}
446+
447+        try:
448+            # Use mx fast string search if available.
449+            from mx.TextTools import FS
450+            self._fs = FS(self._boundary)
451+        except ImportError:
452+            self._fs = None
453+
454+    def parse(self):
455+        try:
456+            self._parse()
457+        finally:
458+            if self._progress_filename:
459+                self._request.file_progress = {'state': 'done'}
460+        return self._post, self._files
461+
462+    def _parse(self):
463+        size = self._size
464+
465+        try:
466+            while size > 0:
467+                n = self._read(self._input, min(self._chunk_size, size))
468+                if not n:
469+                    break
470+                size -= n
471+        except:
472+            # consume any remaining data so we dont generate a "Connection Reset" error
473+            size = self._size - self._received
474+            while size > 0:
475+                data = self._input.read(min(self._chunk_size, size))
476+                size -= len(data)
477+            raise
478+
479+    def _find_boundary(self, data, start, stop):
480+        """
481+        Find the next boundary and return the end of current part
482+        and start of next part.
483+        """
484+        if self._fs:
485+            boundary = self._fs.find(data, start, stop)
486+        else:
487+            boundary = data.find(self._boundary, start, stop)
488+        if boundary >= 0:
489+            end = boundary
490+            next = boundary + len(self._boundary)
491+
492+            # backup over CRLF
493+            if end > 0 and data[end-1] == '\n': end -= 1
494+            if end > 0 and data[end-1] == '\r': end -= 1
495+            # skip over --CRLF
496+            if next < stop and data[next] == '-': next += 1
497+            if next < stop and data[next] == '-': next += 1
498+            if next < stop and data[next] == '\r': next += 1
499+            if next < stop and data[next] == '\n': next += 1
500+
501+            return True, end, next
502+        else:
503+            return False, stop, stop
504+
505+    class TemporaryFile(object):
506+        "A temporary file that tries to delete itself when garbage collected."
507+        def __init__(self, dir):
508+            import tempfile
509+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
510+            self.file = os.fdopen(fd, 'w+b')
511+            self.name = name
512+
513+        def __getattr__(self, name):
514+            a = getattr(self.__dict__['file'], name)
515+            if type(a) != type(0):
516+                setattr(self, name, a)
517+            return a
518+
519+        def __del__(self):
520+            try:
521+                os.unlink(self.name)
522+            except OSError:
523+                pass
524+
525+    class LazyContent(dict):
526+        """
527+        A lazy FILES dictionary entry that reads the contents from
528+        tmpfile only when referenced.
529+        """
530+        def __init__(self, data):
531+            dict.__init__(self, data)
532+
533+        def __getitem__(self, key):
534+            if key == 'content' and not self.has_key(key):
535+                self['tmpfile'].seek(0)
536+                self['content'] = self['tmpfile'].read()
537+            return dict.__getitem__(self, key)
538+
539+    def _read(self, input, size):
540+        data = input.read(size)
541+
542+        if not data:
543+            return 0
544+
545+        read_size = len(data)
546+        self._received += read_size
547+
548+        if self._partial:
549+            data = self._partial + data
550+
551+        start = 0
552+        stop = len(data)
553+
554+        while start < stop:
555+            boundary, end, next = self._find_boundary(data, start, stop)
556+
557+            if not boundary and read_size:
558+                # make sure we dont treat a partial boundary (and its separators) as data
559+                stop -= len(self._boundary) + 16
560+                end = next = stop
561+                if end <= start:
562+                    break # need more data
563+
564+            if self._state == 'PREAMBLE':
565+                # Preamble, just ignore it
566+                self._state = 'HEADER'
567+
568+            elif self._state == 'HEADER':
569+                # Beginning of header, look for end of header and parse it if found.
570+
571+                header_end = data.find('\r\n\r\n', start, stop)
572+                if header_end == -1:
573+                    break # need more data
574+
575+                header = data[start:header_end]
576+
577+                self._fieldname = None
578+                self._filename = None
579+                self._content_type = None
580+
581+                for line in header.split('\r\n'):
582+                    ctype, opts = self.parse_header(line)
583+                    if ctype == 'content-disposition: form-data':
584+                        self._fieldname = opts.get('name')
585+                        self._filename = opts.get('filename')
586+                    elif ctype.startswith('content-type: '):
587+                        self._content_type = ctype[14:]
588+
589+                if self._filename is not None:
590+                    # cleanup filename from IE full paths:
591+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
592+
593+                    if self._filename: # ignore files without filenames
594+                        if self._file_upload_dir:
595+                            try:
596+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
597+                            except:
598+                                raise MultiPartParserError("Failed to create temporary file.")
599+                        else:
600+                            self._file = StringIO()
601+                    else:
602+                        self._file = None
603+                    self._filesize = 0
604+                    self._state = 'FILE'
605+                else:
606+                    self._field = StringIO()
607+                    self._state = 'FIELD'
608+                next = header_end + 4
609+
610+            elif self._state == 'FIELD':
611+                # In a field, collect data until a boundary is found.
612+
613+                self._field.write(data[start:end])
614+                if boundary:
615+                    if self._fieldname:
616+                        self._post.appendlist(self._fieldname, self._field.getvalue())
617+                    self._field.close()
618+                    self._state = 'HEADER'
619+
620+            elif self._state == 'FILE':
621+                # In a file, collect data until a boundary is found.
622+
623+                if self._file:
624+                    try:
625+                        self._file.write(data[start:end])
626+                    except IOError, e:
627+                        raise MultiPartParserError("Failed to write to temporary file.")
628+                    self._filesize += end-start
629+
630+                    if self._progress_filename:
631+                        self._request.file_progress = {'received': self._received,
632+                                                       'size':     self._size,
633+                                                       'state':    'uploading'}
634+
635+                if boundary:
636+                    if self._file:
637+                        if self._file_upload_dir:
638+                            self._file.seek(0)
639+                            file = self.LazyContent({
640+                                'filename': self._filename,
641+                                'content-type':  self._content_type,
642+                                # 'content': is read on demand
643+                                'content-length': self._filesize,
644+                                'tmpfilename': self._file.name,
645+                                'tmpfile': self._file
646+                            })
647+                        else:
648+                            file = {
649+                                'filename': self._filename,
650+                                'content-type':  self._content_type,
651+                                'content': self._file.getvalue(),
652+                                'content-length': self._filesize
653+                            }
654+                            self._file.close()
655+
656+                        self._files.appendlist(self._fieldname, file)
657+
658+                    self._state = 'HEADER'
659+
660+            start = next
661+
662+        self._partial = data[start:]
663+
664+        return read_size
665+
666+    def parse_header(self, line):
667+        from cgi import parse_header
668+        return parse_header(line)
669+
670 class QueryDict(MultiValueDict):
671     """A specialized MultiValueDict that takes a query string when initialized.
672     This is immutable unless you create a copy of it."""
673Index: django/http/file_descriptor.py
674===================================================================
675--- django/http/file_descriptor.py      (revision 0)
676+++ django/http/file_descriptor.py      (revision 0)
677@@ -0,0 +1,65 @@
678+"""
679+This file contains a fallback FileProgressDescriptor
680+for file upload progress.
681+"""
682+import pickle
683+import os
684+
685+class DefaultFileProgressDescriptor(object):
686+    def __init__(self, FileException):
687+        from django.conf import settings
688+
689+        self.FileException = FileException
690+        self.file_upload_dir = settings.FILE_UPLOAD_DIR
691+
692+    def __get__(self, request, HttpRequest):
693+        """
694+        Returns the file progress for this request or an empty dictionary if
695+        the file progress is not known. The result is cached.
696+        """
697+        progress_id = request.META['UPLOAD_PROGRESS_ID']
698+
699+        if not progress_id or not self.file_upload_dir:
700+            return {}
701+
702+        if getattr(self, '_file_progress', None) is not None:
703+            return self._file_progress
704+
705+        try:
706+            f = open(os.path.join(self.file_upload_dir, progress_id), 'rb')
707+            progress = pickle.load(f)
708+            f.close()
709+        except:
710+            progress = {}
711+
712+        self._file_progress = progress
713+        return progress
714+
715+    def __set__(self, request, new_progress):
716+        """
717+        Sets the value of the file progress for this request.
718+        If no file progress is underway, raises an error.
719+        """
720+        progress_id = request.META['UPLOAD_PROGRESS_ID']
721+
722+        if not progress_id or not self.file_upload_dir:
723+            raise self.FileException('There is no upload in progress.')
724+
725+        self._file_progress = new_progress
726+        f = open(os.path.join(self.file_upload_dir, progress_id), 'wb')
727+        pickle.dump(new_progress, f)
728+        f.close()
729+
730+    def __delete__(self, request):
731+        """
732+        Removes the file if there is an upload in process.
733+        """
734+        progress_id = request.META.get('UPLOAD_PROGRESS_ID')
735+
736+        if not progress_id or not self.file_upload_dir:
737+            raise self.FileException('There is no upload in progress.')
738+
739+        try:
740+            os.remove(os.path.join(self.file_upload_dir, progress_id))
741+        except:
742+            pass
743Index: django/oldforms/__init__.py
744===================================================================
745--- django/oldforms/__init__.py (revision 5127)
746+++ django/oldforms/__init__.py (working copy)
747@@ -666,17 +666,22 @@
748         self.validator_list = [self.isNonEmptyFile] + validator_list
749 
750     def isNonEmptyFile(self, field_data, all_data):
751-        try:
752-            content = field_data['content']
753-        except TypeError:
754+        if field_data.has_key('_file_upload_error'):
755+            raise validators.CriticalValidationError, field_data['_file_upload_error']
756+        if not field_data.has_key('filename'):
757             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
758-        if not content:
759+        if not field_data['content-length']:
760             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
761 
762     def render(self, data):
763         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
764             (self.get_id(), self.__class__.__name__, self.field_name)
765 
766+    def prepare(self, new_data):
767+        if new_data.has_key('_file_upload_error'):
768+            # pretend we got something in the field to raise a validation error later
769+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
770+
771     def html2python(data):
772         if data is None:
773             raise EmptyValue
774Index: django/utils/file.py
775===================================================================
776--- django/utils/file.py        (revision 0)
777+++ django/utils/file.py        (revision 0)
778@@ -0,0 +1,36 @@
779+import os
780+
781+try:
782+    import shutils
783+    file_move = shutils.move
784+except:
785+    file_move = os.rename
786+
787+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64):
788+    """
789+    Moves a file from one location to another in the safest way possible.
790+   
791+    First, it tries using shutils.move, which is OS-dependent but doesn't
792+    break with change of filesystems. Then it tries os.rename, which will
793+    break if it encounters a change in filesystems. Lastly, it streams
794+    it manually from one file to another in python.
795+    """
796+   
797+    try:
798+        file_move(old_file_name, new_file_name)
799+        return
800+    except:
801+        pass
802+   
803+    new_file = open(new_file_name, 'wb')
804+    old_file = open(old_file_name, 'rb')
805+    current_chunk = None
806+   
807+    while current_chunk != '':
808+        current_chunk = old_file.read(chunk_size)
809+        new_file.write(current_chunk)
810+       
811+    new_file.close()
812+    old_file.close()
813+
814+    os.remove(old_file_name)
815Index: docs/forms.txt
816===================================================================
817--- docs/forms.txt      (revision 5127)
818+++ docs/forms.txt      (working copy)
819@@ -475,6 +475,19 @@
820    new_data = request.POST.copy()
821    new_data.update(request.FILES)
822 
823+Streaming file uploads.
824+-----------------------
825+
826+File uploads will be read into memory by default. This works fine for
827+small to medium sized uploads (from 1MB to 100MB depending on your
828+setup and usage). If you want to support larger uploads you can enable
829+upload streaming where only a small part of the file will be in memory
830+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
831+setting (see the settings_ document for more details).
832+
833+See `request object`_ for more details about ``request.FILES`` objects
834+with streaming file uploads enabled.
835+
836 Validators
837 ==========
838 
839@@ -697,3 +710,4 @@
840 .. _`generic views`: ../generic_views/
841 .. _`models API`: ../model-api/
842 .. _settings: ../settings/
843+.. _request object: ../request_response/#httprequest-objects
844Index: docs/request_response.txt
845===================================================================
846--- docs/request_response.txt   (revision 5127)
847+++ docs/request_response.txt   (working copy)
848@@ -72,13 +72,25 @@
849 ``FILES``
850     A dictionary-like object containing all uploaded files. Each key in
851     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
852-    value in ``FILES`` is a standard Python dictionary with the following three
853+    value in ``FILES`` is a standard Python dictionary with the following four
854     keys:
855 
856         * ``filename`` -- The name of the uploaded file, as a Python string.
857         * ``content-type`` -- The content type of the uploaded file.
858         * ``content`` -- The raw content of the uploaded file.
859+        * ``content-length`` -- The length of the content in bytes.
860 
861+    If streaming file uploads are enabled two additional keys
862+    describing the uploaded file will be present:
863+
864+       * ``tmpfilename`` -- The filename for the temporary file.
865+       * ``tmpfile`` -- An open file object for the temporary file.
866+
867+    The temporary file will be removed when the request finishes.
868+
869+    Note that accessing ``content`` when streaming uploads are enabled
870+    will read the whole file into memory which may not be what you want.
871+
872     Note that ``FILES`` will only contain data if the request method was POST
873     and the ``<form>`` that posted to the request had
874     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
875Index: docs/settings.txt
876===================================================================
877--- docs/settings.txt   (revision 5127)
878+++ docs/settings.txt   (working copy)
879@@ -439,6 +439,15 @@
880 
881 .. _Testing Django Applications: ../testing/
882 
883+FILE_UPLOAD_DIR
884+---------------
885+
886+Default: ``None``
887+
888+Path to a directory where temporary files should be written during
889+file uploads. Leaving this as ``None`` will disable streaming file uploads,
890+and cause all uploaded files to be stored (temporarily) in memory.
891+
892 IGNORABLE_404_ENDS
893 ------------------
894 
895@@ -782,6 +791,16 @@
896 
897 .. _site framework docs: ../sites/
898 
899+STREAMING_MIN_POST_SIZE
900+-----------------------
901+
902+Default: 524288 (``512*1024``)
903+
904+An integer specifying the minimum number of bytes that has to be
905+received (in a POST) for file upload streaming to take place. Any
906+request smaller than this will be handled in memory.
907+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
908+
909 TEMPLATE_CONTEXT_PROCESSORS
910 ---------------------------
911 
912Index: tests/modeltests/test_client/models.py
913===================================================================
914--- tests/modeltests/test_client/models.py      (revision 5127)
915+++ tests/modeltests/test_client/models.py      (working copy)
916@@ -3,7 +3,7 @@
917 
918 The test client is a class that can act like a simple
919 browser for testing purposes.
920
921+
922 It allows the user to compose GET and POST requests, and
923 obtain the response that the server gave to those requests.
924 The server Response objects are annotated with the details
925@@ -75,6 +75,20 @@
926         self.assertEqual(response.template.name, "Book template")
927         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
928 
929+    def test_post_file_view(self):
930+        "POST this python file to a view"
931+        import os, tempfile
932+        from django.conf import settings
933+        file = __file__.replace('.pyc', '.py')
934+        for upload_dir in [None, tempfile.gettempdir()]:
935+            settings.FILE_UPLOAD_DIR = upload_dir
936+            post_data = { 'name': file, 'file': open(file) }
937+            response = self.client.post('/test_client/post_file_view/', post_data)
938+            self.failUnless('models.py' in response.context['file']['filename'])
939+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
940+            if upload_dir:
941+                self.failUnless(response.context['file']['tmpfilename'])
942+
943     def test_redirect(self):
944         "GET a URL that redirects elsewhere"
945         response = self.client.get('/test_client/redirect_view/')
946Index: tests/modeltests/test_client/urls.py
947===================================================================
948--- tests/modeltests/test_client/urls.py        (revision 5127)
949+++ tests/modeltests/test_client/urls.py        (working copy)
950@@ -4,6 +4,7 @@
951 urlpatterns = patterns('',
952     (r'^get_view/$', views.get_view),
953     (r'^post_view/$', views.post_view),
954+    (r'^post_file_view/$', views.post_file_view),
955     (r'^raw_post_view/$', views.raw_post_view),
956     (r'^redirect_view/$', views.redirect_view),
957     (r'^form_view/$', views.form_view),
958Index: tests/modeltests/test_client/views.py
959===================================================================
960--- tests/modeltests/test_client/views.py       (revision 5127)
961+++ tests/modeltests/test_client/views.py       (working copy)
962@@ -44,6 +44,12 @@
963 
964     return HttpResponse(t.render(c))
965 
966+def post_file_view(request):
967+    "A view that expects a multipart post and returns a file in the context"
968+    t = Template('File {{ file.filename }} received', name='POST Template')
969+    c = Context({'file': request.FILES['file_file']})
970+    return HttpResponse(t.render(c))
971+
972 def redirect_view(request):
973     "A view that redirects all requests to the GET view"
974     return HttpResponseRedirect('/test_client/get_view/')