Code

Ticket #2070: 5100_file_upload_core_with_middleware_hooks_2.diff

File 5100_file_upload_core_with_middleware_hooks_2.diff, 37.7 KB (added by Michael Axiak <axiak@…>, 7 years ago)

Added middleware hooks...this is better.

Line 
1Index: django/http/file_descriptor.py
2===================================================================
3--- django/http/file_descriptor.py      (revision 0)
4+++ django/http/file_descriptor.py      (revision 0)
5@@ -0,0 +1,76 @@
6+"""
7+This file contains a fallback FileProgressDescriptor
8+for file upload progress.
9+"""
10+import pickle
11+import os
12+
13+class DefaultFileProgressDescriptor(object):
14+
15+    def __init__(self, FileException):
16+        self.FileException = FileException
17+   
18+    def __get__(self, request, HttpRequest):
19+        """
20+        Returns the file progress for this request.
21+        If no file progress is known, returns an empty
22+        dictionary.
23+        The request also keeps a local copy so that
24+        the file is not accessed every time one wants to
25+        ask for something.
26+        """
27+        from django.conf import settings
28+
29+        file_upload_dir = settings.FILE_UPLOAD_DIR
30+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
31+       
32+        if not progress_id or not file_upload_dir:
33+            return {'wow': 'true'}
34+            return {}
35+
36+        if getattr(self, '_file_progress', False) != False:
37+            return self._file_progress
38+
39+        try:
40+            f = open(os.path.join(file_upload_dir, progress_id), 'rb')
41+            progress = pickle.load(f)
42+            f.close()
43+            self._file_progress = progress
44+            return progress
45+        except:
46+            self._file_progress = {}
47+            return {}
48+
49+    def __set__(self, request, new_progress):
50+        """
51+        Sets the value of the file progress for this request.
52+        If no file progress is underway, raises an error.
53+        """
54+
55+        from django.conf import settings
56+
57+        file_upload_dir = settings.FILE_UPLOAD_DIR
58+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
59+
60+        if not progress_id or not file_upload_dir:
61+            raise self.FileException('There is no upload in progress.')
62+
63+        self._file_progress = new_progress
64+        f = open(os.path.join(file_upload_dir, progress_id), 'wb')
65+        pickle.dump(new_progress, f)
66+        f.close()
67+
68+    def __delete__(self, request):
69+        """
70+        Removes the file if there is an upload in process.
71+        """
72+        file_upload_dir = settings.FILE_UPLOAD_DIR
73+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
74+
75+        if not progress_id or not file_upload_dir:
76+            raise self.FileException('There is no upload in progress.')
77+
78+        try:
79+            os.remove(os.path.join(file_upload_dir, progress_id))
80+        except:
81+            pass
82Index: django/http/__init__.py
83===================================================================
84--- django/http/__init__.py     (revision 5100)
85+++ django/http/__init__.py     (working copy)
86@@ -1,22 +1,48 @@
87-import os
88+import os, pickle
89 from Cookie import SimpleCookie
90 from pprint import pformat
91 from urllib import urlencode, quote
92 from django.utils.datastructures import MultiValueDict
93+from django.http.file_descriptor import DefaultFileProgressDescriptor
94+import re
95 
96+try:
97+    from cStringIO import StringIO
98+except ImportError:
99+    from StringIO import StringIO
100+
101 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
102 
103+
104 try:
105     # The mod_python version is more efficient, so try importing it first.
106     from mod_python.util import parse_qsl
107 except ImportError:
108     from cgi import parse_qsl
109 
110+class MetaFileProgressDescriptor(object):
111+    """
112+    This descriptor allows other descriptors to
113+    be loaded in runtime to a request instance.
114+    """
115+    def __get__(self, request, *args, **kwargs):
116+        return request._file_progress.__get__(request, *args, **kwargs)
117+
118+    def __set__(self, request, *args, **kwargs):
119+        return request._file_progress.__set__(request, *args, **kwargs)
120+
121+    def __delete__(self, request, *args, **kwargs):
122+        return request._file_progress.__delete__(request, *args, **kwargs)
123+
124 class Http404(Exception):
125     pass
126 
127 class HttpRequest(object):
128     "A basic HTTP request"
129+
130+    upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$')
131+    file_progress = MetaFileProgressDescriptor()
132+
133     def __init__(self):
134         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
135         self.path = ''
136@@ -41,38 +67,332 @@
137 
138     def is_secure(self):
139         return os.environ.get("HTTPS") == "on"
140+
141+    def _get_file_progress_from_args(self, headers, get, querystring):
142 
143-def parse_file_upload(header_dict, post_data):
144-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
145-    import email, email.Message
146-    from cgi import parse_header
147-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
148-    raw_message += '\r\n\r\n' + post_data
149-    msg = email.message_from_string(raw_message)
150-    POST = MultiValueDict()
151-    FILES = MultiValueDict()
152-    for submessage in msg.get_payload():
153-        if submessage and isinstance(submessage, email.Message.Message):
154-            name_dict = parse_header(submessage['Content-Disposition'])[1]
155-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
156-            # or {'name': 'blah'} for POST fields
157-            # We assume all uploaded files have a 'filename' set.
158-            if 'filename' in name_dict:
159-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
160-                if not name_dict['filename'].strip():
161-                    continue
162-                # IE submits the full path, so trim everything but the basename.
163-                # (We can't use os.path.basename because it expects Linux paths.)
164-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
165-                FILES.appendlist(name_dict['name'], {
166-                    'filename': filename,
167-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
168-                    'content': submessage.get_payload(),
169-                })
170-            else:
171-                POST.appendlist(name_dict['name'], submessage.get_payload())
172-    return POST, FILES
173+        if 'X-Upload-ID' in headers:
174+            progress_id = headers['X-Upload-ID']
175+        elif 'X-Progress-ID' in headers:
176+            progress_id = headers['X-Progress-ID']
177+        elif 'upload_id' in get:
178+            progress_id = get['upload_id']
179+        elif 'progress_id' in get:
180+            progress_id = get['progress_id']
181+        elif querystring != None and len(querystring.strip()) == 32:
182+            progress_id = querystring
183+        else:
184+            return None
185 
186+        if not self.upload_id_re.match(progress_id):
187+            return None
188+
189+        return progress_id
190+
191+
192+def parse_file_upload(headers, input, request):
193+    from django.conf import settings
194+
195+    # Only stream files to disk if FILE_STREAMING_DIR is set
196+    file_upload_dir = settings.FILE_UPLOAD_DIR
197+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
198+
199+    try:
200+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
201+        return parser.parse()
202+    except MultiPartParserError, e:
203+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
204+
205+class MultiPartParserError(Exception):
206+    def __init__(self, message):
207+        self.message = message
208+    def __str__(self):
209+        return repr(self.message)
210+       
211+class MultiPartParser(object):
212+    """
213+    A rfc2388 multipart/form-data parser.
214+   
215+    parse() reads the input stream in chunk_size chunks and returns a
216+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
217+    file_upload_dir is defined files will be streamed to temporary
218+    files in the specified directory.
219+
220+    The FILES dictionary will have 'filename', 'content-type',
221+    'content' and 'content-length' entries. For streamed files it will
222+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
223+    only be read from disk when referenced for streamed files.
224+
225+    If the header X-Progress-ID is sent with a 32 character hex string
226+    a temporary file with the same name will be created in
227+    `file_upload_dir`` with a pickled { 'received', 'size' }
228+    dictionary with the number of bytes received and the size expected
229+    respectively. The file will be unlinked when the parser finishes.
230+
231+    """
232+
233+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
234+        try:
235+            content_length = int(headers['Content-Length'])
236+        except:
237+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
238+
239+        content_type = headers.get('Content-Type')
240+
241+        if not content_type or not content_type.startswith('multipart/'):
242+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
243+           
244+        ctype, opts = self.parse_header(content_type)
245+        boundary = opts.get('boundary')
246+        from cgi import valid_boundary
247+        if not boundary or not valid_boundary(boundary):
248+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
249+
250+        progress_id = request.META['UPLOAD_PROGRESS_ID']
251+
252+        if file_upload_dir and progress_id:
253+            self._progress_filename = os.path.join(file_upload_dir, progress_id)
254+        else:
255+            self._progress_filename = None
256+        self._boundary = '--' + boundary
257+        self._input = input
258+        self._size = content_length
259+        self._received = 0
260+        self._file_upload_dir = file_upload_dir
261+        self._chunk_size = chunk_size
262+        self._state = 'PREAMBLE'
263+        self._partial = ''
264+        self._post = MultiValueDict()
265+        self._files = MultiValueDict()
266+        self._request = request
267+       
268+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
269+            self._file_upload_dir = None # disable file streaming for small request
270+
271+        try:
272+            # use mx fast string search if available
273+            from mx.TextTools import FS
274+            self._fs = FS(self._boundary)
275+        except ImportError:
276+            self._fs = None
277+
278+    def parse(self):
279+        try:
280+            self._parse()
281+        finally:
282+            if self._progress_filename:
283+                self._request.file_progress = {'state': 'done'}
284+               
285+       
286+        return self._post, self._files
287+
288+    def _parse(self):
289+        size = self._size
290+
291+        try:
292+            while size > 0:
293+                n = self._read(self._input, min(self._chunk_size, size))
294+                if not n:
295+                    break
296+                size -= n
297+        except:
298+            # consume any remaining data so we dont generate a "Connection Reset" error
299+            size = self._size - self._received
300+            while size > 0:
301+                data = self._input.read(min(self._chunk_size, size))
302+                size -= len(data)
303+            raise
304+
305+    def _find_boundary(self, data, start, stop):
306+        """
307+        Find the next boundary and return the end of current part
308+        and start of next part.
309+        """
310+        if self._fs:
311+            boundary = self._fs.find(data, start, stop)
312+        else:
313+            boundary = data.find(self._boundary, start, stop)
314+        if boundary >= 0:
315+            end = boundary
316+            next = boundary + len(self._boundary)
317+
318+            # backup over CRLF
319+            if end > 0 and data[end-1] == '\n': end -= 1
320+            if end > 0 and data[end-1] == '\r': end -= 1
321+            # skip over --CRLF
322+            if next < stop and data[next] == '-': next += 1
323+            if next < stop and data[next] == '-': next += 1
324+            if next < stop and data[next] == '\r': next += 1
325+            if next < stop and data[next] == '\n': next += 1
326+
327+            return True, end, next
328+        else:
329+            return False, stop, stop
330+
331+    class TemporaryFile(object):
332+        "A temporary file that tries to delete itself when garbage collected."
333+        def __init__(self, dir):
334+            import tempfile
335+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
336+            self.file = os.fdopen(fd, 'w+b')
337+            self.name = name
338+
339+        def __getattr__(self, name):
340+            a = getattr(self.__dict__['file'], name)
341+            if type(a) != type(0):
342+                setattr(self, name, a)
343+            return a
344+
345+        def __del__(self):
346+            try:
347+                os.unlink(self.name)
348+            except OSError:
349+                pass
350+           
351+    class LazyContent(dict):
352+        """
353+        A lazy FILES dictionary entry that reads the contents from
354+        tmpfile only when referenced.
355+        """
356+        def __init__(self, data):
357+            dict.__init__(self, data)
358+       
359+        def __getitem__(self, key):
360+            if key == 'content' and not self.has_key(key):
361+                self['tmpfile'].seek(0)
362+                self['content'] = self['tmpfile'].read()
363+            return dict.__getitem__(self, key)
364+
365+    def _read(self, input, size):
366+        data = input.read(size)
367+
368+        if not data:
369+            return 0
370+
371+        read_size = len(data)
372+        self._received += read_size
373+
374+        if self._partial:
375+            data = self._partial + data
376+
377+        start = 0
378+        stop = len(data)
379+       
380+        while start < stop:
381+            boundary, end, next = self._find_boundary(data, start, stop)
382+
383+            if not boundary and read_size:
384+                # make sure we dont treat a partial boundary (and its separators) as data
385+                stop -= len(self._boundary) + 16
386+                end = next = stop
387+                if end <= start:
388+                    break # need more data
389+
390+            if self._state == 'PREAMBLE':
391+                # Preamble, just ignore it
392+                self._state = 'HEADER'
393+
394+            elif self._state == 'HEADER':
395+                # Beginning of header, look for end of header and parse it if found.
396+
397+                header_end = data.find('\r\n\r\n', start, stop)
398+                if header_end == -1:
399+                    break # need more data
400+
401+                header = data[start:header_end]
402+
403+                self._fieldname = None
404+                self._filename = None
405+                self._content_type = None
406+
407+                for line in header.split('\r\n'):
408+                    ctype, opts = self.parse_header(line)
409+                    if ctype == 'content-disposition: form-data':
410+                        self._fieldname = opts.get('name')
411+                        self._filename = opts.get('filename')
412+                    elif ctype.startswith('content-type: '):
413+                        self._content_type = ctype[14:]
414+
415+                if self._filename is not None:
416+                    # cleanup filename from IE full paths:
417+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
418+
419+                    if self._filename: # ignore files without filenames
420+                        if self._file_upload_dir:
421+                            try:
422+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
423+                            except:
424+                                raise MultiPartParserError("Failed to create temporary file.")
425+                        else:
426+                            self._file = StringIO()
427+                    else:
428+                        self._file = None
429+                    self._filesize = 0
430+                    self._state = 'FILE'
431+                else:
432+                    self._field = StringIO()
433+                    self._state = 'FIELD'
434+                next = header_end + 4
435+
436+            elif self._state == 'FIELD':
437+                # In a field, collect data until a boundary is found.
438+
439+                self._field.write(data[start:end])
440+                if boundary:
441+                    if self._fieldname:
442+                        self._post.appendlist(self._fieldname, self._field.getvalue())
443+                    self._field.close()
444+                    self._state = 'HEADER'
445+
446+            elif self._state == 'FILE':
447+                # In a file, collect data until a boundary is found.
448+
449+                if self._file:
450+                    try:
451+                        self._file.write(data[start:end])
452+                    except IOError, e:
453+                        raise MultiPartParserError("Failed to write to temporary file.")
454+                    self._filesize += end-start
455+
456+                    if self._progress_filename:
457+                        self._request.file_progress = {'received': self._received,
458+                                                       'size':     self._size,
459+                                                       'state':    'uploading'}
460+
461+                if boundary:
462+                    if self._file:
463+                        if self._file_upload_dir:
464+                            self._file.seek(0)
465+                            file = self.LazyContent({
466+                                'filename': self._filename,
467+                                'content-type':  self._content_type,
468+                                # 'content': is read on demand
469+                                'content-length': self._filesize,
470+                                'tmpfilename': self._file.name,
471+                                'tmpfile': self._file
472+                            })
473+                        else:
474+                            file = {
475+                                'filename': self._filename,
476+                                'content-type':  self._content_type,
477+                                'content': self._file.getvalue(),
478+                                'content-length': self._filesize
479+                            }
480+                            self._file.close()
481+
482+                        self._files.appendlist(self._fieldname, file)
483+
484+                    self._state = 'HEADER'
485+
486+            start = next
487+               
488+        self._partial = data[start:]
489+
490+        return read_size
491+
492+    def parse_header(self, line):
493+        from cgi import parse_header
494+        return parse_header(line)
495+
496 class QueryDict(MultiValueDict):
497     """A specialized MultiValueDict that takes a query string when initialized.
498     This is immutable unless you create a copy of it."""
499@@ -306,3 +626,4 @@
500     if not host:
501         host = request.META.get('HTTP_HOST', '')
502     return host
503+
504Index: django/oldforms/__init__.py
505===================================================================
506--- django/oldforms/__init__.py (revision 5100)
507+++ django/oldforms/__init__.py (working copy)
508@@ -666,17 +666,22 @@
509         self.validator_list = [self.isNonEmptyFile] + validator_list
510 
511     def isNonEmptyFile(self, field_data, all_data):
512-        try:
513-            content = field_data['content']
514-        except TypeError:
515+        if field_data.has_key('_file_upload_error'):
516+            raise validators.CriticalValidationError, field_data['_file_upload_error']
517+        if not field_data.has_key('filename'):
518             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
519-        if not content:
520+        if not field_data['content-length']:
521             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
522 
523     def render(self, data):
524         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
525             (self.get_id(), self.__class__.__name__, self.field_name)
526 
527+    def prepare(self, new_data):
528+        if new_data.has_key('_file_upload_error'):
529+            # pretend we got something in the field to raise a validation error later
530+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
531+
532     def html2python(data):
533         if data is None:
534             raise EmptyValue
535Index: django/db/models/base.py
536===================================================================
537--- django/db/models/base.py    (revision 5100)
538+++ django/db/models/base.py    (working copy)
539@@ -12,12 +12,14 @@
540 from django.dispatch import dispatcher
541 from django.utils.datastructures import SortedDict
542 from django.utils.functional import curry
543+from django.utils.file import file_move_safe
544 from django.conf import settings
545 from itertools import izip
546 import types
547 import sys
548 import os
549 
550+               
551 class ModelBase(type):
552     "Metaclass for all models"
553     def __new__(cls, name, bases, attrs):
554@@ -361,7 +363,7 @@
555     def _get_FIELD_size(self, field):
556         return os.path.getsize(self._get_FIELD_filename(field))
557 
558-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
559+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
560         directory = field.get_directory_name()
561         try: # Create the date-based directory if it doesn't exist.
562             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
563@@ -383,9 +385,13 @@
564         setattr(self, field.attname, filename)
565 
566         full_filename = self._get_FIELD_filename(field)
567-        fp = open(full_filename, 'wb')
568-        fp.write(raw_contents)
569-        fp.close()
570+        if raw_field.has_key('tmpfilename'):
571+            raw_field['tmpfile'].close()
572+            file_move_safe(raw_field['tmpfilename'], full_filename)
573+        else:
574+            fp = open(full_filename, 'wb')
575+            fp.write(raw_field['content'])
576+            fp.close()
577 
578         # Save the width and/or height, if applicable.
579         if isinstance(field, ImageField) and (field.width_field or field.height_field):
580Index: django/db/models/fields/__init__.py
581===================================================================
582--- django/db/models/fields/__init__.py (revision 5100)
583+++ django/db/models/fields/__init__.py (working copy)
584@@ -636,7 +636,7 @@
585         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
586         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
587         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
588-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
589+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
590         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
591 
592     def delete_file(self, instance):
593@@ -659,9 +659,9 @@
594         if new_data.get(upload_field_name, False):
595             func = getattr(new_object, 'save_%s_file' % self.name)
596             if rel:
597-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
598+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
599             else:
600-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
601+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
602 
603     def get_directory_name(self):
604         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
605Index: django/conf/global_settings.py
606===================================================================
607--- django/conf/global_settings.py      (revision 5100)
608+++ django/conf/global_settings.py      (working copy)
609@@ -240,6 +240,20 @@
610 # isExistingURL validator.
611 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
612 
613+# The directory to place streamed file uploads. The web server needs write
614+# permissions on this directory.
615+# If this is None, streaming uploads are disabled.
616+FILE_UPLOAD_DIR = None
617+
618+
619+# The minimum size of a POST before file uploads are streamed to disk.
620+# Any less than this number, and the file is uploaded to memory.
621+# Size is in bytes.
622+STREAMING_MIN_POST_SIZE = 512 * (2**10)
623+
624+
625+
626+
627 ##############
628 # MIDDLEWARE #
629 ##############
630@@ -335,3 +349,5 @@
631 
632 # The list of directories to search for fixtures
633 FIXTURE_DIRS = ()
634+
635+
636Index: django/core/handlers/wsgi.py
637===================================================================
638--- django/core/handlers/wsgi.py        (revision 5100)
639+++ django/core/handlers/wsgi.py        (working copy)
640@@ -111,7 +111,14 @@
641             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
642                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
643                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
644-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
645+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
646+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
647+                try:
648+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
649+                except:
650+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
651+                    raise
652+                self._raw_post_data = None # raw data is not available for streamed multipart messages
653             else:
654                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
655         else:
656@@ -167,6 +174,17 @@
657             buf.close()
658             return self._raw_post_data
659 
660+    def _get_file_progress_id(self):
661+        """
662+        Returns the Progress ID of the request,
663+        usually provided if there is a file upload
664+        going on.
665+        Returns ``None`` if no progress ID is specified.
666+        """
667+        return self._get_file_progress_from_args(self.environ,
668+                                                 self.GET,
669+                                                 self._req.args)
670+
671     GET = property(_get_get, _set_get)
672     POST = property(_get_post, _set_post)
673     COOKIES = property(_get_cookies, _set_cookies)
674Index: django/core/handlers/base.py
675===================================================================
676--- django/core/handlers/base.py        (revision 5100)
677+++ django/core/handlers/base.py        (working copy)
678@@ -5,7 +5,7 @@
679 
680 class BaseHandler(object):
681     def __init__(self):
682-        self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
683+        self._upload_middleware = self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
684 
685     def load_middleware(self):
686         """
687@@ -19,6 +19,7 @@
688         self._view_middleware = []
689         self._response_middleware = []
690         self._exception_middleware = []
691+        self._upload_middleware = []
692         for middleware_path in settings.MIDDLEWARE_CLASSES:
693             try:
694                 dot = middleware_path.rindex('.')
695@@ -47,13 +48,29 @@
696                 self._response_middleware.insert(0, mw_instance.process_response)
697             if hasattr(mw_instance, 'process_exception'):
698                 self._exception_middleware.insert(0, mw_instance.process_exception)
699+            if hasattr(mw_instance, 'process_upload'):
700+                self._upload_middleware.append(0, mw_instance.process_upload)
701 
702+    def file_progress_descriptor(self, request):
703+        """
704+        Returns a descriptor that manages the file_progress
705+        """       
706+        for mw_call in self._upload_middleware:
707+            result = mw_call(http.MultiPartParserError)
708+            if result != None:
709+                return result
710+
711+        return http.DefaultFileProgressDescriptor(http.MultiPartParserError)
712+       
713     def get_response(self, request):
714         "Returns an HttpResponse object for the given HttpRequest"
715         from django.core import exceptions, urlresolvers
716         from django.core.mail import mail_admins
717         from django.conf import settings
718 
719+        # Add file_progress descriptor
720+        request._file_progress = self.file_progress_descriptor(request)
721+
722         # Apply request middleware
723         for middleware_method in self._request_middleware:
724             response = middleware_method(request)
725Index: django/core/handlers/modpython.py
726===================================================================
727--- django/core/handlers/modpython.py   (revision 5100)
728+++ django/core/handlers/modpython.py   (working copy)
729@@ -47,7 +47,12 @@
730     def _load_post_and_files(self):
731         "Populates self._post and self._files"
732         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
733-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
734+            self._raw_post_data = None # raw data is not available for streamed multipart messages
735+            try:
736+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
737+            except:
738+                self._post, self._files = {}, {} # make sure we dont read the input stream again
739+                raise
740         else:
741             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
742 
743@@ -92,20 +97,21 @@
744                 'AUTH_TYPE':         self._req.ap_auth_type,
745                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
746                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
747-                'GATEWAY_INTERFACE': 'CGI/1.1',
748-                'PATH_INFO':         self._req.path_info,
749-                'PATH_TRANSLATED':   None, # Not supported
750-                'QUERY_STRING':      self._req.args,
751-                'REMOTE_ADDR':       self._req.connection.remote_ip,
752-                'REMOTE_HOST':       None, # DNS lookups not supported
753-                'REMOTE_IDENT':      self._req.connection.remote_logname,
754-                'REMOTE_USER':       self._req.user,
755-                'REQUEST_METHOD':    self._req.method,
756-                'SCRIPT_NAME':       None, # Not supported
757-                'SERVER_NAME':       self._req.server.server_hostname,
758-                'SERVER_PORT':       self._req.server.port,
759-                'SERVER_PROTOCOL':   self._req.protocol,
760-                'SERVER_SOFTWARE':   'mod_python'
761+                'GATEWAY_INTERFACE':  'CGI/1.1',
762+                'PATH_INFO':          self._req.path_info,
763+                'PATH_TRANSLATED':    None, # Not supported
764+                'QUERY_STRING':       self._req.args,
765+                'REMOTE_ADDR':        self._req.connection.remote_ip,
766+                'REMOTE_HOST':        None, # DNS lookups not supported
767+                'REMOTE_IDENT':       self._req.connection.remote_logname,
768+                'REMOTE_USER':        self._req.user,
769+                'REQUEST_METHOD':     self._req.method,
770+                'SCRIPT_NAME':        None, # Not supported
771+                'SERVER_NAME':        self._req.server.server_hostname,
772+                'SERVER_PORT':        self._req.server.port,
773+                'SERVER_PROTOCOL':    self._req.protocol,
774+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
775+                'SERVER_SOFTWARE':    'mod_python'
776             }
777             for key, value in self._req.headers_in.items():
778                 key = 'HTTP_' + key.upper().replace('-', '_')
779@@ -122,6 +128,18 @@
780     def _get_method(self):
781         return self.META['REQUEST_METHOD'].upper()
782 
783+    def _get_file_progress_id(self):
784+        """
785+        Returns the Progress ID of the request,
786+        usually provided if there is a file upload
787+        going on.
788+        Returns ``None`` if no progress ID is specified.
789+        """
790+        return self._get_file_progress_from_args(self._req.headers_in,
791+                                                 self.GET,
792+                                                 self._req.args)
793+
794+
795     GET = property(_get_get, _set_get)
796     POST = property(_get_post, _set_post)
797     COOKIES = property(_get_cookies, _set_cookies)
798Index: tests/modeltests/test_client/views.py
799===================================================================
800--- tests/modeltests/test_client/views.py       (revision 5100)
801+++ tests/modeltests/test_client/views.py       (working copy)
802@@ -44,6 +44,12 @@
803 
804     return HttpResponse(t.render(c))
805 
806+def post_file_view(request):
807+    "A view that expects a multipart post and returns a file in the context"
808+    t = Template('File {{ file.filename }} received', name='POST Template')
809+    c = Context({'file': request.FILES['file_file']})
810+    return HttpResponse(t.render(c))
811+
812 def redirect_view(request):
813     "A view that redirects all requests to the GET view"
814     return HttpResponseRedirect('/test_client/get_view/')
815Index: tests/modeltests/test_client/models.py
816===================================================================
817--- tests/modeltests/test_client/models.py      (revision 5100)
818+++ tests/modeltests/test_client/models.py      (working copy)
819@@ -75,6 +75,21 @@
820         self.assertEqual(response.template.name, "Book template")
821         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
822 
823+    def test_post_file_view(self):
824+        "POST this python file to a view"
825+        import os, tempfile
826+        from django.conf import settings
827+        file = __file__.replace('.pyc', '.py')
828+        for upload_dir in [None, tempfile.gettempdir()]:
829+            settings.FILE_UPLOAD_DIR = upload_dir
830+            post_data = { 'name': file, 'file': open(file) }
831+            response = self.client.post('/test_client/post_file_view/', post_data)
832+            self.failUnless('models.py' in response.context['file']['filename'])
833+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
834+            if upload_dir:
835+                self.failUnless(response.context['file']['tmpfilename'])
836+
837+
838     def test_redirect(self):
839         "GET a URL that redirects elsewhere"
840         response = self.client.get('/test_client/redirect_view/')
841Index: tests/modeltests/test_client/urls.py
842===================================================================
843--- tests/modeltests/test_client/urls.py        (revision 5100)
844+++ tests/modeltests/test_client/urls.py        (working copy)
845@@ -4,6 +4,7 @@
846 urlpatterns = patterns('',
847     (r'^get_view/$', views.get_view),
848     (r'^post_view/$', views.post_view),
849+    (r'^post_file_view/$', views.post_file_view),
850     (r'^raw_post_view/$', views.raw_post_view),
851     (r'^redirect_view/$', views.redirect_view),
852     (r'^form_view/$', views.form_view),
853Index: docs/request_response.txt
854===================================================================
855--- docs/request_response.txt   (revision 5100)
856+++ docs/request_response.txt   (working copy)
857@@ -72,13 +72,25 @@
858 ``FILES``
859     A dictionary-like object containing all uploaded files. Each key in
860     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
861-    value in ``FILES`` is a standard Python dictionary with the following three
862+    value in ``FILES`` is a standard Python dictionary with the following four
863     keys:
864 
865         * ``filename`` -- The name of the uploaded file, as a Python string.
866         * ``content-type`` -- The content type of the uploaded file.
867         * ``content`` -- The raw content of the uploaded file.
868+        * ``content-length`` -- The length of the content in bytes.
869 
870+    If streaming file uploads are enabled two additional keys
871+    describing the uploaded file will be present:
872+
873+       * ``tmpfilename`` -- The filename for the temporary file.
874+       * ``tmpfile`` -- An open file object for the temporary file.
875+
876+    The temporary file will be removed when the request finishes.
877+
878+    Note that accessing ``content`` when streaming uploads are enabled
879+    will read the whole file into memory which may not be what you want.
880+
881     Note that ``FILES`` will only contain data if the request method was POST
882     and the ``<form>`` that posted to the request had
883     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
884Index: docs/settings.txt
885===================================================================
886--- docs/settings.txt   (revision 5100)
887+++ docs/settings.txt   (working copy)
888@@ -437,6 +437,15 @@
889 
890 .. _Testing Django Applications: ../testing/
891 
892+FILE_UPLOAD_DIR
893+---------------
894+
895+Default: ``None``
896+
897+Path to a directory where temporary files should be written during
898+file uploads. Leaving this as ``None`` will disable streaming file uploads,
899+and cause all uploaded files to be stored (temporarily) in memory.
900+
901 IGNORABLE_404_ENDS
902 ------------------
903 
904@@ -780,6 +789,16 @@
905 
906 .. _site framework docs: ../sites/
907 
908+STREAMING_MIN_POST_SIZE
909+-----------------------
910+
911+Default: 524288 (``512*1024``)
912+
913+An integer specifying the minimum number of bytes that has to be
914+received (in a POST) for file upload streaming to take place. Any
915+request smaller than this will be handled in memory.
916+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
917+
918 TEMPLATE_CONTEXT_PROCESSORS
919 ---------------------------
920 
921Index: docs/forms.txt
922===================================================================
923--- docs/forms.txt      (revision 5100)
924+++ docs/forms.txt      (working copy)
925@@ -475,6 +475,19 @@
926    new_data = request.POST.copy()
927    new_data.update(request.FILES)
928 
929+Streaming file uploads.
930+-----------------------
931+
932+File uploads will be read into memory by default. This works fine for
933+small to medium sized uploads (from 1MB to 100MB depending on your
934+setup and usage). If you want to support larger uploads you can enable
935+upload streaming where only a small part of the file will be in memory
936+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
937+setting (see the settings_ document for more details).
938+
939+See `request object`_ for more details about ``request.FILES`` objects
940+with streaming file uploads enabled.
941+
942 Validators
943 ==========
944 
945@@ -693,3 +706,4 @@
946 .. _`generic views`: ../generic_views/
947 .. _`models API`: ../model-api/
948 .. _settings: ../settings/
949+.. _request object: ../request_response/#httprequest-objects