Code

Ticket #2070: 5116_streaming_upload_fixed_middleware_append_2.diff

File 5116_streaming_upload_fixed_middleware_append_2.diff, 42.4 KB (added by Michael Axiak <axiak@…>, 7 years ago)

Fixed bug where .append(0, func) was called with the files this time.

Line 
1Index: django/http/file_descriptor.py
2===================================================================
3--- django/http/file_descriptor.py      (revision 0)
4+++ django/http/file_descriptor.py      (revision 0)
5@@ -0,0 +1,151 @@
6+"""
7+This file contains a fallback FileProgressDescriptor
8+for file upload progress.
9+"""
10+import pickle
11+import os
12+
13+class DefaultFileProgressDescriptor(object):
14+
15+    def __init__(self, FileException):
16+        self.FileException = FileException
17+   
18+    def __get__(self, request, HttpRequest):
19+        """
20+        Returns the file progress for this request.
21+        If no file progress is known, returns an empty
22+        dictionary.
23+        The request also keeps a local copy so that
24+        the file is not accessed every time one wants to
25+        ask for something.
26+        """
27+        from django.conf import settings
28+
29+        file_upload_dir = settings.FILE_UPLOAD_DIR
30+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
31+       
32+        if not progress_id or not file_upload_dir:
33+            return {}
34+
35+        if getattr(self, '_file_progress', False) != False:
36+            return self._file_progress
37+
38+        try:
39+            f = open(os.path.join(file_upload_dir, progress_id), 'rb')
40+            progress = pickle.load(f)
41+            f.close()
42+            self._file_progress = progress
43+            return progress
44+        except:
45+            self._file_progress = {}
46+            return {}
47+
48+    def __set__(self, request, new_progress):
49+        """
50+        Sets the value of the file progress for this request.
51+        If no file progress is underway, raises an error.
52+        """
53+
54+        from django.conf import settings
55+
56+        file_upload_dir = settings.FILE_UPLOAD_DIR
57+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
58+
59+        if not progress_id or not file_upload_dir:
60+            raise self.FileException('There is no upload in progress.')
61+
62+        self._file_progress = new_progress
63+        f = open(os.path.join(file_upload_dir, progress_id), 'wb')
64+        pickle.dump(new_progress, f)
65+        f.close()
66+
67+    def __delete__(self, request):
68+        """
69+        Removes the file if there is an upload in process.
70+        """
71+        file_upload_dir = settings.FILE_UPLOAD_DIR
72+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
73+
74+        if not progress_id or not file_upload_dir:
75+            raise self.FileException('There is no upload in progress.')
76+
77+        try:
78+            os.remove(os.path.join(file_upload_dir, progress_id))
79+        except:
80+            pass
81+"""
82+This file contains a fallback FileProgressDescriptor
83+for file upload progress.
84+"""
85+import pickle
86+import os
87+
88+class DefaultFileProgressDescriptor(object):
89+
90+    def __init__(self, FileException):
91+        self.FileException = FileException
92+   
93+    def __get__(self, request, HttpRequest):
94+        """
95+        Returns the file progress for this request.
96+        If no file progress is known, returns an empty
97+        dictionary.
98+        The request also keeps a local copy so that
99+        the file is not accessed every time one wants to
100+        ask for something.
101+        """
102+        from django.conf import settings
103+
104+        file_upload_dir = settings.FILE_UPLOAD_DIR
105+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
106+       
107+        if not progress_id or not file_upload_dir:
108+            return {'wow': 'true'}
109+            return {}
110+
111+        if getattr(self, '_file_progress', False) != False:
112+            return self._file_progress
113+
114+        try:
115+            f = open(os.path.join(file_upload_dir, progress_id), 'rb')
116+            progress = pickle.load(f)
117+            f.close()
118+            self._file_progress = progress
119+            return progress
120+        except:
121+            self._file_progress = {}
122+            return {}
123+
124+    def __set__(self, request, new_progress):
125+        """
126+        Sets the value of the file progress for this request.
127+        If no file progress is underway, raises an error.
128+        """
129+
130+        from django.conf import settings
131+
132+        file_upload_dir = settings.FILE_UPLOAD_DIR
133+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
134+
135+        if not progress_id or not file_upload_dir:
136+            raise self.FileException('There is no upload in progress.')
137+
138+        self._file_progress = new_progress
139+        f = open(os.path.join(file_upload_dir, progress_id), 'wb')
140+        pickle.dump(new_progress, f)
141+        f.close()
142+
143+    def __delete__(self, request):
144+        """
145+        Removes the file if there is an upload in process.
146+        """
147+        file_upload_dir = settings.FILE_UPLOAD_DIR
148+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
149+
150+        if not progress_id or not file_upload_dir:
151+            raise self.FileException('There is no upload in progress.')
152+
153+        try:
154+            os.remove(os.path.join(file_upload_dir, progress_id))
155+        except:
156+            pass
157Index: django/http/__init__.py
158===================================================================
159--- django/http/__init__.py     (revision 5116)
160+++ django/http/__init__.py     (working copy)
161@@ -1,22 +1,48 @@
162-import os
163+import os, pickle
164 from Cookie import SimpleCookie
165 from pprint import pformat
166 from urllib import urlencode, quote
167 from django.utils.datastructures import MultiValueDict
168+from django.http.file_descriptor import DefaultFileProgressDescriptor
169+import re
170 
171+try:
172+    from cStringIO import StringIO
173+except ImportError:
174+    from StringIO import StringIO
175+
176 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
177 
178+
179 try:
180     # The mod_python version is more efficient, so try importing it first.
181     from mod_python.util import parse_qsl
182 except ImportError:
183     from cgi import parse_qsl
184 
185+class MetaFileProgressDescriptor(object):
186+    """
187+    This descriptor allows other descriptors to
188+    be loaded in runtime to a request instance.
189+    """
190+    def __get__(self, request, *args, **kwargs):
191+        return request._file_progress.__get__(request, *args, **kwargs)
192+
193+    def __set__(self, request, *args, **kwargs):
194+        return request._file_progress.__set__(request, *args, **kwargs)
195+
196+    def __delete__(self, request, *args, **kwargs):
197+        return request._file_progress.__delete__(request, *args, **kwargs)
198+
199 class Http404(Exception):
200     pass
201 
202 class HttpRequest(object):
203     "A basic HTTP request"
204+
205+    upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$')
206+    file_progress = MetaFileProgressDescriptor()
207+
208     def __init__(self):
209         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
210         self.path = ''
211@@ -41,38 +67,332 @@
212 
213     def is_secure(self):
214         return os.environ.get("HTTPS") == "on"
215+
216+    def _get_file_progress_from_args(self, headers, get, querystring):
217 
218-def parse_file_upload(header_dict, post_data):
219-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
220-    import email, email.Message
221-    from cgi import parse_header
222-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
223-    raw_message += '\r\n\r\n' + post_data
224-    msg = email.message_from_string(raw_message)
225-    POST = MultiValueDict()
226-    FILES = MultiValueDict()
227-    for submessage in msg.get_payload():
228-        if submessage and isinstance(submessage, email.Message.Message):
229-            name_dict = parse_header(submessage['Content-Disposition'])[1]
230-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
231-            # or {'name': 'blah'} for POST fields
232-            # We assume all uploaded files have a 'filename' set.
233-            if 'filename' in name_dict:
234-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
235-                if not name_dict['filename'].strip():
236-                    continue
237-                # IE submits the full path, so trim everything but the basename.
238-                # (We can't use os.path.basename because it expects Linux paths.)
239-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
240-                FILES.appendlist(name_dict['name'], {
241-                    'filename': filename,
242-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
243-                    'content': submessage.get_payload(),
244-                })
245-            else:
246-                POST.appendlist(name_dict['name'], submessage.get_payload())
247-    return POST, FILES
248+        if 'X-Upload-ID' in headers:
249+            progress_id = headers['X-Upload-ID']
250+        elif 'X-Progress-ID' in headers:
251+            progress_id = headers['X-Progress-ID']
252+        elif 'upload_id' in get:
253+            progress_id = get['upload_id']
254+        elif 'progress_id' in get:
255+            progress_id = get['progress_id']
256+        elif querystring != None and len(querystring.strip()) == 32:
257+            progress_id = querystring
258+        else:
259+            return None
260 
261+        if not self.upload_id_re.match(progress_id):
262+            return None
263+
264+        return progress_id
265+
266+
267+def parse_file_upload(headers, input, request):
268+    from django.conf import settings
269+
270+    # Only stream files to disk if FILE_STREAMING_DIR is set
271+    file_upload_dir = settings.FILE_UPLOAD_DIR
272+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
273+
274+    try:
275+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
276+        return parser.parse()
277+    except MultiPartParserError, e:
278+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
279+
280+class MultiPartParserError(Exception):
281+    def __init__(self, message):
282+        self.message = message
283+    def __str__(self):
284+        return repr(self.message)
285+       
286+class MultiPartParser(object):
287+    """
288+    A rfc2388 multipart/form-data parser.
289+   
290+    parse() reads the input stream in chunk_size chunks and returns a
291+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
292+    file_upload_dir is defined files will be streamed to temporary
293+    files in the specified directory.
294+
295+    The FILES dictionary will have 'filename', 'content-type',
296+    'content' and 'content-length' entries. For streamed files it will
297+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
298+    only be read from disk when referenced for streamed files.
299+
300+    If the header X-Progress-ID is sent with a 32 character hex string
301+    a temporary file with the same name will be created in
302+    `file_upload_dir`` with a pickled { 'received', 'size' }
303+    dictionary with the number of bytes received and the size expected
304+    respectively. The file will be unlinked when the parser finishes.
305+
306+    """
307+
308+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
309+        try:
310+            content_length = int(headers['Content-Length'])
311+        except:
312+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
313+
314+        content_type = headers.get('Content-Type')
315+
316+        if not content_type or not content_type.startswith('multipart/'):
317+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
318+           
319+        ctype, opts = self.parse_header(content_type)
320+        boundary = opts.get('boundary')
321+        from cgi import valid_boundary
322+        if not boundary or not valid_boundary(boundary):
323+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
324+
325+        progress_id = request.META['UPLOAD_PROGRESS_ID']
326+
327+        if file_upload_dir and progress_id:
328+            self._progress_filename = os.path.join(file_upload_dir, progress_id)
329+        else:
330+            self._progress_filename = None
331+        self._boundary = '--' + boundary
332+        self._input = input
333+        self._size = content_length
334+        self._received = 0
335+        self._file_upload_dir = file_upload_dir
336+        self._chunk_size = chunk_size
337+        self._state = 'PREAMBLE'
338+        self._partial = ''
339+        self._post = MultiValueDict()
340+        self._files = MultiValueDict()
341+        self._request = request
342+       
343+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
344+            self._file_upload_dir = None # disable file streaming for small request
345+
346+        try:
347+            # use mx fast string search if available
348+            from mx.TextTools import FS
349+            self._fs = FS(self._boundary)
350+        except ImportError:
351+            self._fs = None
352+
353+    def parse(self):
354+        try:
355+            self._parse()
356+        finally:
357+            if self._progress_filename:
358+                self._request.file_progress = {'state': 'done'}
359+               
360+       
361+        return self._post, self._files
362+
363+    def _parse(self):
364+        size = self._size
365+
366+        try:
367+            while size > 0:
368+                n = self._read(self._input, min(self._chunk_size, size))
369+                if not n:
370+                    break
371+                size -= n
372+        except:
373+            # consume any remaining data so we dont generate a "Connection Reset" error
374+            size = self._size - self._received
375+            while size > 0:
376+                data = self._input.read(min(self._chunk_size, size))
377+                size -= len(data)
378+            raise
379+
380+    def _find_boundary(self, data, start, stop):
381+        """
382+        Find the next boundary and return the end of current part
383+        and start of next part.
384+        """
385+        if self._fs:
386+            boundary = self._fs.find(data, start, stop)
387+        else:
388+            boundary = data.find(self._boundary, start, stop)
389+        if boundary >= 0:
390+            end = boundary
391+            next = boundary + len(self._boundary)
392+
393+            # backup over CRLF
394+            if end > 0 and data[end-1] == '\n': end -= 1
395+            if end > 0 and data[end-1] == '\r': end -= 1
396+            # skip over --CRLF
397+            if next < stop and data[next] == '-': next += 1
398+            if next < stop and data[next] == '-': next += 1
399+            if next < stop and data[next] == '\r': next += 1
400+            if next < stop and data[next] == '\n': next += 1
401+
402+            return True, end, next
403+        else:
404+            return False, stop, stop
405+
406+    class TemporaryFile(object):
407+        "A temporary file that tries to delete itself when garbage collected."
408+        def __init__(self, dir):
409+            import tempfile
410+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
411+            self.file = os.fdopen(fd, 'w+b')
412+            self.name = name
413+
414+        def __getattr__(self, name):
415+            a = getattr(self.__dict__['file'], name)
416+            if type(a) != type(0):
417+                setattr(self, name, a)
418+            return a
419+
420+        def __del__(self):
421+            try:
422+                os.unlink(self.name)
423+            except OSError:
424+                pass
425+           
426+    class LazyContent(dict):
427+        """
428+        A lazy FILES dictionary entry that reads the contents from
429+        tmpfile only when referenced.
430+        """
431+        def __init__(self, data):
432+            dict.__init__(self, data)
433+       
434+        def __getitem__(self, key):
435+            if key == 'content' and not self.has_key(key):
436+                self['tmpfile'].seek(0)
437+                self['content'] = self['tmpfile'].read()
438+            return dict.__getitem__(self, key)
439+
440+    def _read(self, input, size):
441+        data = input.read(size)
442+
443+        if not data:
444+            return 0
445+
446+        read_size = len(data)
447+        self._received += read_size
448+
449+        if self._partial:
450+            data = self._partial + data
451+
452+        start = 0
453+        stop = len(data)
454+       
455+        while start < stop:
456+            boundary, end, next = self._find_boundary(data, start, stop)
457+
458+            if not boundary and read_size:
459+                # make sure we dont treat a partial boundary (and its separators) as data
460+                stop -= len(self._boundary) + 16
461+                end = next = stop
462+                if end <= start:
463+                    break # need more data
464+
465+            if self._state == 'PREAMBLE':
466+                # Preamble, just ignore it
467+                self._state = 'HEADER'
468+
469+            elif self._state == 'HEADER':
470+                # Beginning of header, look for end of header and parse it if found.
471+
472+                header_end = data.find('\r\n\r\n', start, stop)
473+                if header_end == -1:
474+                    break # need more data
475+
476+                header = data[start:header_end]
477+
478+                self._fieldname = None
479+                self._filename = None
480+                self._content_type = None
481+
482+                for line in header.split('\r\n'):
483+                    ctype, opts = self.parse_header(line)
484+                    if ctype == 'content-disposition: form-data':
485+                        self._fieldname = opts.get('name')
486+                        self._filename = opts.get('filename')
487+                    elif ctype.startswith('content-type: '):
488+                        self._content_type = ctype[14:]
489+
490+                if self._filename is not None:
491+                    # cleanup filename from IE full paths:
492+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
493+
494+                    if self._filename: # ignore files without filenames
495+                        if self._file_upload_dir:
496+                            try:
497+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
498+                            except:
499+                                raise MultiPartParserError("Failed to create temporary file.")
500+                        else:
501+                            self._file = StringIO()
502+                    else:
503+                        self._file = None
504+                    self._filesize = 0
505+                    self._state = 'FILE'
506+                else:
507+                    self._field = StringIO()
508+                    self._state = 'FIELD'
509+                next = header_end + 4
510+
511+            elif self._state == 'FIELD':
512+                # In a field, collect data until a boundary is found.
513+
514+                self._field.write(data[start:end])
515+                if boundary:
516+                    if self._fieldname:
517+                        self._post.appendlist(self._fieldname, self._field.getvalue())
518+                    self._field.close()
519+                    self._state = 'HEADER'
520+
521+            elif self._state == 'FILE':
522+                # In a file, collect data until a boundary is found.
523+
524+                if self._file:
525+                    try:
526+                        self._file.write(data[start:end])
527+                    except IOError, e:
528+                        raise MultiPartParserError("Failed to write to temporary file.")
529+                    self._filesize += end-start
530+
531+                    if self._progress_filename:
532+                        self._request.file_progress = {'received': self._received,
533+                                                       'size':     self._size,
534+                                                       'state':    'uploading'}
535+
536+                if boundary:
537+                    if self._file:
538+                        if self._file_upload_dir:
539+                            self._file.seek(0)
540+                            file = self.LazyContent({
541+                                'filename': self._filename,
542+                                'content-type':  self._content_type,
543+                                # 'content': is read on demand
544+                                'content-length': self._filesize,
545+                                'tmpfilename': self._file.name,
546+                                'tmpfile': self._file
547+                            })
548+                        else:
549+                            file = {
550+                                'filename': self._filename,
551+                                'content-type':  self._content_type,
552+                                'content': self._file.getvalue(),
553+                                'content-length': self._filesize
554+                            }
555+                            self._file.close()
556+
557+                        self._files.appendlist(self._fieldname, file)
558+
559+                    self._state = 'HEADER'
560+
561+            start = next
562+               
563+        self._partial = data[start:]
564+
565+        return read_size
566+
567+    def parse_header(self, line):
568+        from cgi import parse_header
569+        return parse_header(line)
570+
571 class QueryDict(MultiValueDict):
572     """A specialized MultiValueDict that takes a query string when initialized.
573     This is immutable unless you create a copy of it."""
574@@ -306,3 +626,4 @@
575     if not host:
576         host = request.META.get('HTTP_HOST', '')
577     return host
578+
579Index: django/oldforms/__init__.py
580===================================================================
581--- django/oldforms/__init__.py (revision 5116)
582+++ django/oldforms/__init__.py (working copy)
583@@ -666,17 +666,22 @@
584         self.validator_list = [self.isNonEmptyFile] + validator_list
585 
586     def isNonEmptyFile(self, field_data, all_data):
587-        try:
588-            content = field_data['content']
589-        except TypeError:
590+        if field_data.has_key('_file_upload_error'):
591+            raise validators.CriticalValidationError, field_data['_file_upload_error']
592+        if not field_data.has_key('filename'):
593             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
594-        if not content:
595+        if not field_data['content-length']:
596             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
597 
598     def render(self, data):
599         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
600             (self.get_id(), self.__class__.__name__, self.field_name)
601 
602+    def prepare(self, new_data):
603+        if new_data.has_key('_file_upload_error'):
604+            # pretend we got something in the field to raise a validation error later
605+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
606+
607     def html2python(data):
608         if data is None:
609             raise EmptyValue
610Index: django/db/models/base.py
611===================================================================
612--- django/db/models/base.py    (revision 5116)
613+++ django/db/models/base.py    (working copy)
614@@ -12,12 +12,14 @@
615 from django.dispatch import dispatcher
616 from django.utils.datastructures import SortedDict
617 from django.utils.functional import curry
618+from django.utils.file import file_move_safe
619 from django.conf import settings
620 from itertools import izip
621 import types
622 import sys
623 import os
624 
625+               
626 class ModelBase(type):
627     "Metaclass for all models"
628     def __new__(cls, name, bases, attrs):
629@@ -361,7 +363,7 @@
630     def _get_FIELD_size(self, field):
631         return os.path.getsize(self._get_FIELD_filename(field))
632 
633-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
634+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
635         directory = field.get_directory_name()
636         try: # Create the date-based directory if it doesn't exist.
637             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
638@@ -383,9 +385,13 @@
639         setattr(self, field.attname, filename)
640 
641         full_filename = self._get_FIELD_filename(field)
642-        fp = open(full_filename, 'wb')
643-        fp.write(raw_contents)
644-        fp.close()
645+        if raw_field.has_key('tmpfilename'):
646+            raw_field['tmpfile'].close()
647+            file_move_safe(raw_field['tmpfilename'], full_filename)
648+        else:
649+            fp = open(full_filename, 'wb')
650+            fp.write(raw_field['content'])
651+            fp.close()
652 
653         # Save the width and/or height, if applicable.
654         if isinstance(field, ImageField) and (field.width_field or field.height_field):
655Index: django/db/models/fields/__init__.py
656===================================================================
657--- django/db/models/fields/__init__.py (revision 5116)
658+++ django/db/models/fields/__init__.py (working copy)
659@@ -636,7 +636,7 @@
660         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
661         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
662         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
663-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
664+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
665         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
666 
667     def delete_file(self, instance):
668@@ -659,9 +659,9 @@
669         if new_data.get(upload_field_name, False):
670             func = getattr(new_object, 'save_%s_file' % self.name)
671             if rel:
672-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
673+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
674             else:
675-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
676+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
677 
678     def get_directory_name(self):
679         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
680Index: django/conf/global_settings.py
681===================================================================
682--- django/conf/global_settings.py      (revision 5116)
683+++ django/conf/global_settings.py      (working copy)
684@@ -240,6 +240,20 @@
685 # isExistingURL validator.
686 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
687 
688+# The directory to place streamed file uploads. The web server needs write
689+# permissions on this directory.
690+# If this is None, streaming uploads are disabled.
691+FILE_UPLOAD_DIR = None
692+
693+
694+# The minimum size of a POST before file uploads are streamed to disk.
695+# Any less than this number, and the file is uploaded to memory.
696+# Size is in bytes.
697+STREAMING_MIN_POST_SIZE = 512 * (2**10)
698+
699+
700+
701+
702 ##############
703 # MIDDLEWARE #
704 ##############
705@@ -335,3 +349,5 @@
706 
707 # The list of directories to search for fixtures
708 FIXTURE_DIRS = ()
709+
710+
711Index: django/core/handlers/wsgi.py
712===================================================================
713--- django/core/handlers/wsgi.py        (revision 5116)
714+++ django/core/handlers/wsgi.py        (working copy)
715@@ -75,6 +75,7 @@
716         self.environ = environ
717         self.path = environ['PATH_INFO']
718         self.META = environ
719+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
720         self.method = environ['REQUEST_METHOD'].upper()
721 
722     def __repr__(self):
723@@ -111,7 +112,14 @@
724             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
725                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
726                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
727-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
728+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
729+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
730+                try:
731+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
732+                except:
733+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
734+                    raise
735+                self._raw_post_data = None # raw data is not available for streamed multipart messages
736             else:
737                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
738         else:
739@@ -167,6 +175,17 @@
740             buf.close()
741             return self._raw_post_data
742 
743+    def _get_file_progress_id(self):
744+        """
745+        Returns the Progress ID of the request,
746+        usually provided if there is a file upload
747+        going on.
748+        Returns ``None`` if no progress ID is specified.
749+        """
750+        return self._get_file_progress_from_args(self.environ,
751+                                                 self.GET,
752+                                                 self._req.args)
753+
754     GET = property(_get_get, _set_get)
755     POST = property(_get_post, _set_post)
756     COOKIES = property(_get_cookies, _set_cookies)
757Index: django/core/handlers/base.py
758===================================================================
759--- django/core/handlers/base.py        (revision 5116)
760+++ django/core/handlers/base.py        (working copy)
761@@ -5,7 +5,7 @@
762 
763 class BaseHandler(object):
764     def __init__(self):
765-        self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
766+        self._upload_middleware = self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
767 
768     def load_middleware(self):
769         """
770@@ -19,6 +19,7 @@
771         self._view_middleware = []
772         self._response_middleware = []
773         self._exception_middleware = []
774+        self._upload_middleware = []
775         for middleware_path in settings.MIDDLEWARE_CLASSES:
776             try:
777                 dot = middleware_path.rindex('.')
778@@ -47,13 +48,29 @@
779                 self._response_middleware.insert(0, mw_instance.process_response)
780             if hasattr(mw_instance, 'process_exception'):
781                 self._exception_middleware.insert(0, mw_instance.process_exception)
782+            if hasattr(mw_instance, 'process_upload'):
783+                self._upload_middleware.append(mw_instance.process_upload)
784 
785+    def file_progress_descriptor(self, request):
786+        """
787+        Returns a descriptor that manages the file_progress
788+        """       
789+        for mw_call in self._upload_middleware:
790+            result = mw_call(http.MultiPartParserError)
791+            if result != None:
792+                return result
793+
794+        return http.DefaultFileProgressDescriptor(http.MultiPartParserError)
795+       
796     def get_response(self, request):
797         "Returns an HttpResponse object for the given HttpRequest"
798         from django.core import exceptions, urlresolvers
799         from django.core.mail import mail_admins
800         from django.conf import settings
801 
802+        # Add file_progress descriptor
803+        request._file_progress = self.file_progress_descriptor(request)
804+
805         # Apply request middleware
806         for middleware_method in self._request_middleware:
807             response = middleware_method(request)
808Index: django/core/handlers/modpython.py
809===================================================================
810--- django/core/handlers/modpython.py   (revision 5116)
811+++ django/core/handlers/modpython.py   (working copy)
812@@ -47,7 +47,12 @@
813     def _load_post_and_files(self):
814         "Populates self._post and self._files"
815         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
816-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
817+            self._raw_post_data = None # raw data is not available for streamed multipart messages
818+            try:
819+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
820+            except:
821+                self._post, self._files = {}, {} # make sure we dont read the input stream again
822+                raise
823         else:
824             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
825 
826@@ -92,20 +97,21 @@
827                 'AUTH_TYPE':         self._req.ap_auth_type,
828                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
829                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
830-                'GATEWAY_INTERFACE': 'CGI/1.1',
831-                'PATH_INFO':         self._req.path_info,
832-                'PATH_TRANSLATED':   None, # Not supported
833-                'QUERY_STRING':      self._req.args,
834-                'REMOTE_ADDR':       self._req.connection.remote_ip,
835-                'REMOTE_HOST':       None, # DNS lookups not supported
836-                'REMOTE_IDENT':      self._req.connection.remote_logname,
837-                'REMOTE_USER':       self._req.user,
838-                'REQUEST_METHOD':    self._req.method,
839-                'SCRIPT_NAME':       None, # Not supported
840-                'SERVER_NAME':       self._req.server.server_hostname,
841-                'SERVER_PORT':       self._req.server.port,
842-                'SERVER_PROTOCOL':   self._req.protocol,
843-                'SERVER_SOFTWARE':   'mod_python'
844+                'GATEWAY_INTERFACE':  'CGI/1.1',
845+                'PATH_INFO':          self._req.path_info,
846+                'PATH_TRANSLATED':    None, # Not supported
847+                'QUERY_STRING':       self._req.args,
848+                'REMOTE_ADDR':        self._req.connection.remote_ip,
849+                'REMOTE_HOST':        None, # DNS lookups not supported
850+                'REMOTE_IDENT':       self._req.connection.remote_logname,
851+                'REMOTE_USER':        self._req.user,
852+                'REQUEST_METHOD':     self._req.method,
853+                'SCRIPT_NAME':        None, # Not supported
854+                'SERVER_NAME':        self._req.server.server_hostname,
855+                'SERVER_PORT':        self._req.server.port,
856+                'SERVER_PROTOCOL':    self._req.protocol,
857+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
858+                'SERVER_SOFTWARE':    'mod_python'
859             }
860             for key, value in self._req.headers_in.items():
861                 key = 'HTTP_' + key.upper().replace('-', '_')
862@@ -122,6 +128,18 @@
863     def _get_method(self):
864         return self.META['REQUEST_METHOD'].upper()
865 
866+    def _get_file_progress_id(self):
867+        """
868+        Returns the Progress ID of the request,
869+        usually provided if there is a file upload
870+        going on.
871+        Returns ``None`` if no progress ID is specified.
872+        """
873+        return self._get_file_progress_from_args(self._req.headers_in,
874+                                                 self.GET,
875+                                                 self._req.args)
876+
877+
878     GET = property(_get_get, _set_get)
879     POST = property(_get_post, _set_post)
880     COOKIES = property(_get_cookies, _set_cookies)
881Index: django/utils/file.py
882===================================================================
883--- django/utils/file.py        (revision 0)
884+++ django/utils/file.py        (revision 0)
885@@ -0,0 +1,72 @@
886+import os
887+
888+try:
889+    import shutils
890+    file_move = shutils.move
891+except:
892+    file_move = os.rename
893+
894+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64):
895+    """
896+    Moves a file from one location to another in the safest way possible.
897+   
898+    First, it tries using shutils.move, which is OS-dependent but doesn't
899+    break with change of filesystems. Then it tries os.rename, which will
900+    break if it encounters a change in filesystems. Lastly, it streams
901+    it manually from one file to another in python.
902+    """
903+   
904+    try:
905+        file_move(old_file_name, new_file_name)
906+        return
907+    except:
908+        pass
909+   
910+    new_file = open(new_file_name, 'wb')
911+    old_file = open(old_file_name, 'rb')
912+    current_chunk = None
913+   
914+    while current_chunk != '':
915+        current_chunk = old_file.read(chunk_size)
916+        new_file.write(current_chunk)
917+       
918+    new_file.close()
919+    old_file.close()
920+
921+    os.remove(old_file_name)
922+import os
923+
924+try:
925+    import shutils
926+    file_move = shutils.move
927+except:
928+    file_move = os.rename
929+
930+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64):
931+    """
932+    Moves a file from one location to another in the safest way possible.
933+   
934+    First, it tries using shutils.move, which is OS-dependent but doesn't
935+    break with change of filesystems. Then it tries os.rename, which will
936+    break if it encounters a change in filesystems. Lastly, it streams
937+    it manually from one file to another in python.
938+    """
939+   
940+    try:
941+        file_move(old_file_name, new_file_name)
942+        return
943+    except:
944+        pass
945+   
946+    new_file = open(new_file_name, 'wb')
947+    old_file = open(old_file_name, 'rb')
948+    current_chunk = None
949+   
950+    while current_chunk != '':
951+        current_chunk = old_file.read(chunk_size)
952+        new_file.write(current_chunk)
953+       
954+    new_file.close()
955+    old_file.close()
956+
957+    os.remove(old_file_name)
958Index: tests/modeltests/test_client/views.py
959===================================================================
960--- tests/modeltests/test_client/views.py       (revision 5116)
961+++ tests/modeltests/test_client/views.py       (working copy)
962@@ -44,6 +44,12 @@
963 
964     return HttpResponse(t.render(c))
965 
966+def post_file_view(request):
967+    "A view that expects a multipart post and returns a file in the context"
968+    t = Template('File {{ file.filename }} received', name='POST Template')
969+    c = Context({'file': request.FILES['file_file']})
970+    return HttpResponse(t.render(c))
971+
972 def redirect_view(request):
973     "A view that redirects all requests to the GET view"
974     return HttpResponseRedirect('/test_client/get_view/')
975Index: tests/modeltests/test_client/models.py
976===================================================================
977--- tests/modeltests/test_client/models.py      (revision 5116)
978+++ tests/modeltests/test_client/models.py      (working copy)
979@@ -75,6 +75,21 @@
980         self.assertEqual(response.template.name, "Book template")
981         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
982 
983+    def test_post_file_view(self):
984+        "POST this python file to a view"
985+        import os, tempfile
986+        from django.conf import settings
987+        file = __file__.replace('.pyc', '.py')
988+        for upload_dir in [None, tempfile.gettempdir()]:
989+            settings.FILE_UPLOAD_DIR = upload_dir
990+            post_data = { 'name': file, 'file': open(file) }
991+            response = self.client.post('/test_client/post_file_view/', post_data)
992+            self.failUnless('models.py' in response.context['file']['filename'])
993+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
994+            if upload_dir:
995+                self.failUnless(response.context['file']['tmpfilename'])
996+
997+
998     def test_redirect(self):
999         "GET a URL that redirects elsewhere"
1000         response = self.client.get('/test_client/redirect_view/')
1001Index: tests/modeltests/test_client/urls.py
1002===================================================================
1003--- tests/modeltests/test_client/urls.py        (revision 5116)
1004+++ tests/modeltests/test_client/urls.py        (working copy)
1005@@ -4,6 +4,7 @@
1006 urlpatterns = patterns('',
1007     (r'^get_view/$', views.get_view),
1008     (r'^post_view/$', views.post_view),
1009+    (r'^post_file_view/$', views.post_file_view),
1010     (r'^raw_post_view/$', views.raw_post_view),
1011     (r'^redirect_view/$', views.redirect_view),
1012     (r'^form_view/$', views.form_view),
1013Index: docs/request_response.txt
1014===================================================================
1015--- docs/request_response.txt   (revision 5116)
1016+++ docs/request_response.txt   (working copy)
1017@@ -72,13 +72,25 @@
1018 ``FILES``
1019     A dictionary-like object containing all uploaded files. Each key in
1020     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
1021-    value in ``FILES`` is a standard Python dictionary with the following three
1022+    value in ``FILES`` is a standard Python dictionary with the following four
1023     keys:
1024 
1025         * ``filename`` -- The name of the uploaded file, as a Python string.
1026         * ``content-type`` -- The content type of the uploaded file.
1027         * ``content`` -- The raw content of the uploaded file.
1028+        * ``content-length`` -- The length of the content in bytes.
1029 
1030+    If streaming file uploads are enabled two additional keys
1031+    describing the uploaded file will be present:
1032+
1033+       * ``tmpfilename`` -- The filename for the temporary file.
1034+       * ``tmpfile`` -- An open file object for the temporary file.
1035+
1036+    The temporary file will be removed when the request finishes.
1037+
1038+    Note that accessing ``content`` when streaming uploads are enabled
1039+    will read the whole file into memory which may not be what you want.
1040+
1041     Note that ``FILES`` will only contain data if the request method was POST
1042     and the ``<form>`` that posted to the request had
1043     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
1044Index: docs/settings.txt
1045===================================================================
1046--- docs/settings.txt   (revision 5116)
1047+++ docs/settings.txt   (working copy)
1048@@ -439,6 +439,15 @@
1049 
1050 .. _Testing Django Applications: ../testing/
1051 
1052+FILE_UPLOAD_DIR
1053+---------------
1054+
1055+Default: ``None``
1056+
1057+Path to a directory where temporary files should be written during
1058+file uploads. Leaving this as ``None`` will disable streaming file uploads,
1059+and cause all uploaded files to be stored (temporarily) in memory.
1060+
1061 IGNORABLE_404_ENDS
1062 ------------------
1063 
1064@@ -782,6 +791,16 @@
1065 
1066 .. _site framework docs: ../sites/
1067 
1068+STREAMING_MIN_POST_SIZE
1069+-----------------------
1070+
1071+Default: 524288 (``512*1024``)
1072+
1073+An integer specifying the minimum number of bytes that has to be
1074+received (in a POST) for file upload streaming to take place. Any
1075+request smaller than this will be handled in memory.
1076+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
1077+
1078 TEMPLATE_CONTEXT_PROCESSORS
1079 ---------------------------
1080 
1081Index: docs/forms.txt
1082===================================================================
1083--- docs/forms.txt      (revision 5116)
1084+++ docs/forms.txt      (working copy)
1085@@ -475,6 +475,19 @@
1086    new_data = request.POST.copy()
1087    new_data.update(request.FILES)
1088 
1089+Streaming file uploads.
1090+-----------------------
1091+
1092+File uploads will be read into memory by default. This works fine for
1093+small to medium sized uploads (from 1MB to 100MB depending on your
1094+setup and usage). If you want to support larger uploads you can enable
1095+upload streaming where only a small part of the file will be in memory
1096+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
1097+setting (see the settings_ document for more details).
1098+
1099+See `request object`_ for more details about ``request.FILES`` objects
1100+with streaming file uploads enabled.
1101+
1102 Validators
1103 ==========
1104 
1105@@ -697,3 +710,4 @@
1106 .. _`generic views`: ../generic_views/
1107 .. _`models API`: ../model-api/
1108 .. _settings: ../settings/
1109+.. _request object: ../request_response/#httprequest-objects