Code

Ticket #2070: 5126_file_uploads_latest_patch.diff

File 5126_file_uploads_latest_patch.diff, 39.4 KB (added by Michael Axiak <axiak@…>, 7 years ago)

Added 'state':'starting' to be more like mod_uploadprogress.

Line 
1Index: django/http/file_descriptor.py
2===================================================================
3--- django/http/file_descriptor.py      (revision 0)
4+++ django/http/file_descriptor.py      (revision 0)
5@@ -0,0 +1,75 @@
6+"""
7+This file contains a fallback FileProgressDescriptor
8+for file upload progress.
9+"""
10+import pickle
11+import os
12+
13+class DefaultFileProgressDescriptor(object):
14+
15+    def __init__(self, FileException):
16+        self.FileException = FileException
17+   
18+    def __get__(self, request, HttpRequest):
19+        """
20+        Returns the file progress for this request.
21+        If no file progress is known, returns an empty
22+        dictionary.
23+        The request also keeps a local copy so that
24+        the file is not accessed every time one wants to
25+        ask for something.
26+        """
27+        from django.conf import settings
28+
29+        file_upload_dir = settings.FILE_UPLOAD_DIR
30+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
31+       
32+        if not progress_id or not file_upload_dir:
33+            return {}
34+
35+        if getattr(self, '_file_progress', False) != False:
36+            return self._file_progress
37+
38+        try:
39+            f = open(os.path.join(file_upload_dir, progress_id), 'rb')
40+            progress = pickle.load(f)
41+            f.close()
42+            self._file_progress = progress
43+            return progress
44+        except:
45+            self._file_progress = {}
46+            return {}
47+
48+    def __set__(self, request, new_progress):
49+        """
50+        Sets the value of the file progress for this request.
51+        If no file progress is underway, raises an error.
52+        """
53+
54+        from django.conf import settings
55+
56+        file_upload_dir = settings.FILE_UPLOAD_DIR
57+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
58+
59+        if not progress_id or not file_upload_dir:
60+            raise self.FileException('There is no upload in progress.')
61+
62+        self._file_progress = new_progress
63+        f = open(os.path.join(file_upload_dir, progress_id), 'wb')
64+        pickle.dump(new_progress, f)
65+        f.close()
66+
67+    def __delete__(self, request):
68+        """
69+        Removes the file if there is an upload in process.
70+        """
71+        file_upload_dir = settings.FILE_UPLOAD_DIR
72+        progress_id     = request.META['UPLOAD_PROGRESS_ID']
73+
74+        if not progress_id or not file_upload_dir:
75+            raise self.FileException('There is no upload in progress.')
76+
77+        try:
78+            os.remove(os.path.join(file_upload_dir, progress_id))
79+        except:
80+            pass
81Index: django/http/__init__.py
82===================================================================
83--- django/http/__init__.py     (revision 5126)
84+++ django/http/__init__.py     (working copy)
85@@ -1,22 +1,48 @@
86-import os
87+import os, pickle
88 from Cookie import SimpleCookie
89 from pprint import pformat
90 from urllib import urlencode, quote
91 from django.utils.datastructures import MultiValueDict
92+from django.http.file_descriptor import DefaultFileProgressDescriptor
93+import re
94 
95+try:
96+    from cStringIO import StringIO
97+except ImportError:
98+    from StringIO import StringIO
99+
100 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
101 
102+
103 try:
104     # The mod_python version is more efficient, so try importing it first.
105     from mod_python.util import parse_qsl
106 except ImportError:
107     from cgi import parse_qsl
108 
109+class MetaFileProgressDescriptor(object):
110+    """
111+    This descriptor allows other descriptors to
112+    be loaded in runtime to a request instance.
113+    """
114+    def __get__(self, request, *args, **kwargs):
115+        return request._file_progress.__get__(request, *args, **kwargs)
116+
117+    def __set__(self, request, *args, **kwargs):
118+        return request._file_progress.__set__(request, *args, **kwargs)
119+
120+    def __delete__(self, request, *args, **kwargs):
121+        return request._file_progress.__delete__(request, *args, **kwargs)
122+
123 class Http404(Exception):
124     pass
125 
126 class HttpRequest(object):
127     "A basic HTTP request"
128+
129+    upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$')
130+    file_progress = MetaFileProgressDescriptor()
131+
132     def __init__(self):
133         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
134         self.path = ''
135@@ -41,38 +67,338 @@
136 
137     def is_secure(self):
138         return os.environ.get("HTTPS") == "on"
139+
140+    def _get_file_progress_from_args(self, headers, get, querystring):
141 
142-def parse_file_upload(header_dict, post_data):
143-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
144-    import email, email.Message
145-    from cgi import parse_header
146-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
147-    raw_message += '\r\n\r\n' + post_data
148-    msg = email.message_from_string(raw_message)
149-    POST = MultiValueDict()
150-    FILES = MultiValueDict()
151-    for submessage in msg.get_payload():
152-        if submessage and isinstance(submessage, email.Message.Message):
153-            name_dict = parse_header(submessage['Content-Disposition'])[1]
154-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
155-            # or {'name': 'blah'} for POST fields
156-            # We assume all uploaded files have a 'filename' set.
157-            if 'filename' in name_dict:
158-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
159-                if not name_dict['filename'].strip():
160-                    continue
161-                # IE submits the full path, so trim everything but the basename.
162-                # (We can't use os.path.basename because it expects Linux paths.)
163-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
164-                FILES.appendlist(name_dict['name'], {
165-                    'filename': filename,
166-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
167-                    'content': submessage.get_payload(),
168-                })
169-            else:
170-                POST.appendlist(name_dict['name'], submessage.get_payload())
171-    return POST, FILES
172+        if 'X-Upload-ID' in headers:
173+            progress_id = headers['X-Upload-ID']
174+        elif 'X-Progress-ID' in headers:
175+            progress_id = headers['X-Progress-ID']
176+        elif 'HTTP_X_UPLOAD_ID' in headers:
177+            progress_id = headers['HTTP_X_UPLOAD_ID']
178+        elif 'HTTP_X_PROGRESS_ID' in headers:
179+            progress_id = headers['HTTP_X_PROGRESS_ID']
180+        elif 'upload_id' in get:
181+            progress_id = get['upload_id']
182+        elif 'progress_id' in get:
183+            progress_id = get['progress_id']
184+        elif querystring != None and len(querystring.strip()) == 32:
185+            progress_id = querystring
186+        else:
187+            return None
188 
189+        if not self.upload_id_re.match(progress_id):
190+            return None
191+
192+        return progress_id
193+
194+
195+def parse_file_upload(headers, input, request):
196+    from django.conf import settings
197+
198+    # Only stream files to disk if FILE_STREAMING_DIR is set
199+    file_upload_dir = settings.FILE_UPLOAD_DIR
200+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
201+
202+    try:
203+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
204+        return parser.parse()
205+    except MultiPartParserError, e:
206+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
207+
208+class MultiPartParserError(Exception):
209+    def __init__(self, message):
210+        self.message = message
211+    def __str__(self):
212+        return repr(self.message)
213+       
214+class MultiPartParser(object):
215+    """
216+    A rfc2388 multipart/form-data parser.
217+   
218+    parse() reads the input stream in chunk_size chunks and returns a
219+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
220+    file_upload_dir is defined files will be streamed to temporary
221+    files in the specified directory.
222+
223+    The FILES dictionary will have 'filename', 'content-type',
224+    'content' and 'content-length' entries. For streamed files it will
225+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
226+    only be read from disk when referenced for streamed files.
227+
228+    If the header X-Progress-ID is sent with a 32 character hex string
229+    a temporary file with the same name will be created in
230+    `file_upload_dir`` with a pickled { 'received', 'size' }
231+    dictionary with the number of bytes received and the size expected
232+    respectively. The file will be unlinked when the parser finishes.
233+
234+    """
235+
236+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
237+        try:
238+            content_length = int(headers['Content-Length'])
239+        except:
240+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
241+
242+        content_type = headers.get('Content-Type')
243+
244+        if not content_type or not content_type.startswith('multipart/'):
245+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
246+           
247+        ctype, opts = self.parse_header(content_type)
248+        boundary = opts.get('boundary')
249+        from cgi import valid_boundary
250+        if not boundary or not valid_boundary(boundary):
251+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
252+
253+        progress_id = request.META['UPLOAD_PROGRESS_ID']
254+
255+        if file_upload_dir and progress_id:
256+            self._progress_filename = os.path.join(file_upload_dir, progress_id)
257+        else:
258+            self._progress_filename = None
259+        self._boundary = '--' + boundary
260+        self._input = input
261+        self._size = content_length
262+        self._received = 0
263+        self._file_upload_dir = file_upload_dir
264+        self._chunk_size = chunk_size
265+        self._state = 'PREAMBLE'
266+        self._partial = ''
267+        self._post = MultiValueDict()
268+        self._files = MultiValueDict()
269+        self._request = request
270+       
271+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
272+            self._file_upload_dir = None # disable file streaming for small request
273+        else:
274+            request.file_progress = {'state': 'starting'}
275+
276+        try:
277+            # use mx fast string search if available
278+            from mx.TextTools import FS
279+            self._fs = FS(self._boundary)
280+        except ImportError:
281+            self._fs = None
282+
283+    def parse(self):
284+        try:
285+            self._parse()
286+        finally:
287+            if self._progress_filename:
288+                self._request.file_progress = {'state': 'done'}
289+               
290+       
291+        return self._post, self._files
292+
293+    def _parse(self):
294+        size = self._size
295+
296+        try:
297+            while size > 0:
298+                n = self._read(self._input, min(self._chunk_size, size))
299+                if not n:
300+                    break
301+                size -= n
302+        except:
303+            # consume any remaining data so we dont generate a "Connection Reset" error
304+            size = self._size - self._received
305+            while size > 0:
306+                data = self._input.read(min(self._chunk_size, size))
307+                size -= len(data)
308+            raise
309+
310+    def _find_boundary(self, data, start, stop):
311+        """
312+        Find the next boundary and return the end of current part
313+        and start of next part.
314+        """
315+        if self._fs:
316+            boundary = self._fs.find(data, start, stop)
317+        else:
318+            boundary = data.find(self._boundary, start, stop)
319+        if boundary >= 0:
320+            end = boundary
321+            next = boundary + len(self._boundary)
322+
323+            # backup over CRLF
324+            if end > 0 and data[end-1] == '\n': end -= 1
325+            if end > 0 and data[end-1] == '\r': end -= 1
326+            # skip over --CRLF
327+            if next < stop and data[next] == '-': next += 1
328+            if next < stop and data[next] == '-': next += 1
329+            if next < stop and data[next] == '\r': next += 1
330+            if next < stop and data[next] == '\n': next += 1
331+
332+            return True, end, next
333+        else:
334+            return False, stop, stop
335+
336+    class TemporaryFile(object):
337+        "A temporary file that tries to delete itself when garbage collected."
338+        def __init__(self, dir):
339+            import tempfile
340+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
341+            self.file = os.fdopen(fd, 'w+b')
342+            self.name = name
343+
344+        def __getattr__(self, name):
345+            a = getattr(self.__dict__['file'], name)
346+            if type(a) != type(0):
347+                setattr(self, name, a)
348+            return a
349+
350+        def __del__(self):
351+            try:
352+                os.unlink(self.name)
353+            except OSError:
354+                pass
355+           
356+    class LazyContent(dict):
357+        """
358+        A lazy FILES dictionary entry that reads the contents from
359+        tmpfile only when referenced.
360+        """
361+        def __init__(self, data):
362+            dict.__init__(self, data)
363+       
364+        def __getitem__(self, key):
365+            if key == 'content' and not self.has_key(key):
366+                self['tmpfile'].seek(0)
367+                self['content'] = self['tmpfile'].read()
368+            return dict.__getitem__(self, key)
369+
370+    def _read(self, input, size):
371+        data = input.read(size)
372+
373+        if not data:
374+            return 0
375+
376+        read_size = len(data)
377+        self._received += read_size
378+
379+        if self._partial:
380+            data = self._partial + data
381+
382+        start = 0
383+        stop = len(data)
384+       
385+        while start < stop:
386+            boundary, end, next = self._find_boundary(data, start, stop)
387+
388+            if not boundary and read_size:
389+                # make sure we dont treat a partial boundary (and its separators) as data
390+                stop -= len(self._boundary) + 16
391+                end = next = stop
392+                if end <= start:
393+                    break # need more data
394+
395+            if self._state == 'PREAMBLE':
396+                # Preamble, just ignore it
397+                self._state = 'HEADER'
398+
399+            elif self._state == 'HEADER':
400+                # Beginning of header, look for end of header and parse it if found.
401+
402+                header_end = data.find('\r\n\r\n', start, stop)
403+                if header_end == -1:
404+                    break # need more data
405+
406+                header = data[start:header_end]
407+
408+                self._fieldname = None
409+                self._filename = None
410+                self._content_type = None
411+
412+                for line in header.split('\r\n'):
413+                    ctype, opts = self.parse_header(line)
414+                    if ctype == 'content-disposition: form-data':
415+                        self._fieldname = opts.get('name')
416+                        self._filename = opts.get('filename')
417+                    elif ctype.startswith('content-type: '):
418+                        self._content_type = ctype[14:]
419+
420+                if self._filename is not None:
421+                    # cleanup filename from IE full paths:
422+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
423+
424+                    if self._filename: # ignore files without filenames
425+                        if self._file_upload_dir:
426+                            try:
427+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
428+                            except:
429+                                raise MultiPartParserError("Failed to create temporary file.")
430+                        else:
431+                            self._file = StringIO()
432+                    else:
433+                        self._file = None
434+                    self._filesize = 0
435+                    self._state = 'FILE'
436+                else:
437+                    self._field = StringIO()
438+                    self._state = 'FIELD'
439+                next = header_end + 4
440+
441+            elif self._state == 'FIELD':
442+                # In a field, collect data until a boundary is found.
443+
444+                self._field.write(data[start:end])
445+                if boundary:
446+                    if self._fieldname:
447+                        self._post.appendlist(self._fieldname, self._field.getvalue())
448+                    self._field.close()
449+                    self._state = 'HEADER'
450+
451+            elif self._state == 'FILE':
452+                # In a file, collect data until a boundary is found.
453+
454+                if self._file:
455+                    try:
456+                        self._file.write(data[start:end])
457+                    except IOError, e:
458+                        raise MultiPartParserError("Failed to write to temporary file.")
459+                    self._filesize += end-start
460+
461+                    if self._progress_filename:
462+                        self._request.file_progress = {'received': self._received,
463+                                                       'size':     self._size,
464+                                                       'state':    'uploading'}
465+
466+                if boundary:
467+                    if self._file:
468+                        if self._file_upload_dir:
469+                            self._file.seek(0)
470+                            file = self.LazyContent({
471+                                'filename': self._filename,
472+                                'content-type':  self._content_type,
473+                                # 'content': is read on demand
474+                                'content-length': self._filesize,
475+                                'tmpfilename': self._file.name,
476+                                'tmpfile': self._file
477+                            })
478+                        else:
479+                            file = {
480+                                'filename': self._filename,
481+                                'content-type':  self._content_type,
482+                                'content': self._file.getvalue(),
483+                                'content-length': self._filesize
484+                            }
485+                            self._file.close()
486+
487+                        self._files.appendlist(self._fieldname, file)
488+
489+                    self._state = 'HEADER'
490+
491+            start = next
492+               
493+        self._partial = data[start:]
494+
495+        return read_size
496+
497+    def parse_header(self, line):
498+        from cgi import parse_header
499+        return parse_header(line)
500+
501 class QueryDict(MultiValueDict):
502     """A specialized MultiValueDict that takes a query string when initialized.
503     This is immutable unless you create a copy of it."""
504@@ -306,3 +632,4 @@
505     if not host:
506         host = request.META.get('HTTP_HOST', '')
507     return host
508+
509Index: django/conf/global_settings.py
510===================================================================
511--- django/conf/global_settings.py      (revision 5126)
512+++ django/conf/global_settings.py      (working copy)
513@@ -240,6 +240,20 @@
514 # isExistingURL validator.
515 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
516 
517+# The directory to place streamed file uploads. The web server needs write
518+# permissions on this directory.
519+# If this is None, streaming uploads are disabled.
520+FILE_UPLOAD_DIR = None
521+
522+
523+# The minimum size of a POST before file uploads are streamed to disk.
524+# Any less than this number, and the file is uploaded to memory.
525+# Size is in bytes.
526+STREAMING_MIN_POST_SIZE = 512 * (2**10)
527+
528+
529+
530+
531 ##############
532 # MIDDLEWARE #
533 ##############
534@@ -335,3 +349,5 @@
535 
536 # The list of directories to search for fixtures
537 FIXTURE_DIRS = ()
538+
539+
540Index: django/db/models/base.py
541===================================================================
542--- django/db/models/base.py    (revision 5126)
543+++ django/db/models/base.py    (working copy)
544@@ -12,12 +12,14 @@
545 from django.dispatch import dispatcher
546 from django.utils.datastructures import SortedDict
547 from django.utils.functional import curry
548+from django.utils.file import file_move_safe
549 from django.conf import settings
550 from itertools import izip
551 import types
552 import sys
553 import os
554 
555+               
556 class ModelBase(type):
557     "Metaclass for all models"
558     def __new__(cls, name, bases, attrs):
559@@ -361,7 +363,7 @@
560     def _get_FIELD_size(self, field):
561         return os.path.getsize(self._get_FIELD_filename(field))
562 
563-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
564+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
565         directory = field.get_directory_name()
566         try: # Create the date-based directory if it doesn't exist.
567             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
568@@ -383,9 +385,13 @@
569         setattr(self, field.attname, filename)
570 
571         full_filename = self._get_FIELD_filename(field)
572-        fp = open(full_filename, 'wb')
573-        fp.write(raw_contents)
574-        fp.close()
575+        if raw_field.has_key('tmpfilename'):
576+            raw_field['tmpfile'].close()
577+            file_move_safe(raw_field['tmpfilename'], full_filename)
578+        else:
579+            fp = open(full_filename, 'wb')
580+            fp.write(raw_field['content'])
581+            fp.close()
582 
583         # Save the width and/or height, if applicable.
584         if isinstance(field, ImageField) and (field.width_field or field.height_field):
585Index: django/db/models/fields/__init__.py
586===================================================================
587--- django/db/models/fields/__init__.py (revision 5126)
588+++ django/db/models/fields/__init__.py (working copy)
589@@ -638,7 +638,7 @@
590         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
591         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
592         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
593-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
594+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
595         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
596 
597     def delete_file(self, instance):
598@@ -661,9 +661,9 @@
599         if new_data.get(upload_field_name, False):
600             func = getattr(new_object, 'save_%s_file' % self.name)
601             if rel:
602-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
603+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
604             else:
605-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
606+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
607 
608     def get_directory_name(self):
609         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
610Index: django/oldforms/__init__.py
611===================================================================
612--- django/oldforms/__init__.py (revision 5126)
613+++ django/oldforms/__init__.py (working copy)
614@@ -666,17 +666,22 @@
615         self.validator_list = [self.isNonEmptyFile] + validator_list
616 
617     def isNonEmptyFile(self, field_data, all_data):
618-        try:
619-            content = field_data['content']
620-        except TypeError:
621+        if field_data.has_key('_file_upload_error'):
622+            raise validators.CriticalValidationError, field_data['_file_upload_error']
623+        if not field_data.has_key('filename'):
624             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
625-        if not content:
626+        if not field_data['content-length']:
627             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
628 
629     def render(self, data):
630         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
631             (self.get_id(), self.__class__.__name__, self.field_name)
632 
633+    def prepare(self, new_data):
634+        if new_data.has_key('_file_upload_error'):
635+            # pretend we got something in the field to raise a validation error later
636+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
637+
638     def html2python(data):
639         if data is None:
640             raise EmptyValue
641Index: django/core/handlers/wsgi.py
642===================================================================
643--- django/core/handlers/wsgi.py        (revision 5126)
644+++ django/core/handlers/wsgi.py        (working copy)
645@@ -75,6 +75,7 @@
646         self.environ = environ
647         self.path = environ['PATH_INFO']
648         self.META = environ
649+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
650         self.method = environ['REQUEST_METHOD'].upper()
651 
652     def __repr__(self):
653@@ -111,7 +112,14 @@
654             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
655                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
656                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
657-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
658+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
659+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
660+                try:
661+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
662+                except:
663+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
664+                    raise
665+                self._raw_post_data = None # raw data is not available for streamed multipart messages
666             else:
667                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
668         else:
669@@ -167,6 +175,17 @@
670             buf.close()
671             return self._raw_post_data
672 
673+    def _get_file_progress_id(self):
674+        """
675+        Returns the Progress ID of the request,
676+        usually provided if there is a file upload
677+        going on.
678+        Returns ``None`` if no progress ID is specified.
679+        """
680+        return self._get_file_progress_from_args(self.environ,
681+                                                 self.GET,
682+                                                 self.environ.get('QUERY_STRING', ''))
683+
684     GET = property(_get_get, _set_get)
685     POST = property(_get_post, _set_post)
686     COOKIES = property(_get_cookies, _set_cookies)
687Index: django/core/handlers/base.py
688===================================================================
689--- django/core/handlers/base.py        (revision 5126)
690+++ django/core/handlers/base.py        (working copy)
691@@ -5,7 +5,7 @@
692 
693 class BaseHandler(object):
694     def __init__(self):
695-        self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
696+        self._upload_middleware = self._request_middleware = self._view_middleware = self._response_middleware = self._exception_middleware = None
697 
698     def load_middleware(self):
699         """
700@@ -19,6 +19,7 @@
701         self._view_middleware = []
702         self._response_middleware = []
703         self._exception_middleware = []
704+        self._upload_middleware = []
705         for middleware_path in settings.MIDDLEWARE_CLASSES:
706             try:
707                 dot = middleware_path.rindex('.')
708@@ -47,13 +48,28 @@
709                 self._response_middleware.insert(0, mw_instance.process_response)
710             if hasattr(mw_instance, 'process_exception'):
711                 self._exception_middleware.insert(0, mw_instance.process_exception)
712+            if hasattr(mw_instance, 'process_upload'):
713+                self._upload_middleware.append(mw_instance.process_upload)
714 
715+    def file_progress_descriptor(self, request):
716+        """
717+        Returns a descriptor that manages the file_progress
718+        """       
719+        for mw_call in self._upload_middleware:
720+            result = mw_call(http.MultiPartParserError)
721+            if result != None:
722+                return result
723+        return http.DefaultFileProgressDescriptor(http.MultiPartParserError)
724+       
725     def get_response(self, request):
726         "Returns an HttpResponse object for the given HttpRequest"
727         from django.core import exceptions, urlresolvers
728         from django.core.mail import mail_admins
729         from django.conf import settings
730 
731+        # Add file_progress descriptor
732+        request._file_progress = self.file_progress_descriptor(request)
733+
734         # Apply request middleware
735         for middleware_method in self._request_middleware:
736             response = middleware_method(request)
737Index: django/core/handlers/modpython.py
738===================================================================
739--- django/core/handlers/modpython.py   (revision 5126)
740+++ django/core/handlers/modpython.py   (working copy)
741@@ -47,7 +47,12 @@
742     def _load_post_and_files(self):
743         "Populates self._post and self._files"
744         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
745-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
746+            self._raw_post_data = None # raw data is not available for streamed multipart messages
747+            try:
748+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
749+            except:
750+                self._post, self._files = {}, {} # make sure we dont read the input stream again
751+                raise
752         else:
753             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
754 
755@@ -92,20 +97,21 @@
756                 'AUTH_TYPE':         self._req.ap_auth_type,
757                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
758                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
759-                'GATEWAY_INTERFACE': 'CGI/1.1',
760-                'PATH_INFO':         self._req.path_info,
761-                'PATH_TRANSLATED':   None, # Not supported
762-                'QUERY_STRING':      self._req.args,
763-                'REMOTE_ADDR':       self._req.connection.remote_ip,
764-                'REMOTE_HOST':       None, # DNS lookups not supported
765-                'REMOTE_IDENT':      self._req.connection.remote_logname,
766-                'REMOTE_USER':       self._req.user,
767-                'REQUEST_METHOD':    self._req.method,
768-                'SCRIPT_NAME':       None, # Not supported
769-                'SERVER_NAME':       self._req.server.server_hostname,
770-                'SERVER_PORT':       self._req.server.port,
771-                'SERVER_PROTOCOL':   self._req.protocol,
772-                'SERVER_SOFTWARE':   'mod_python'
773+                'GATEWAY_INTERFACE':  'CGI/1.1',
774+                'PATH_INFO':          self._req.path_info,
775+                'PATH_TRANSLATED':    None, # Not supported
776+                'QUERY_STRING':       self._req.args,
777+                'REMOTE_ADDR':        self._req.connection.remote_ip,
778+                'REMOTE_HOST':        None, # DNS lookups not supported
779+                'REMOTE_IDENT':       self._req.connection.remote_logname,
780+                'REMOTE_USER':        self._req.user,
781+                'REQUEST_METHOD':     self._req.method,
782+                'SCRIPT_NAME':        None, # Not supported
783+                'SERVER_NAME':        self._req.server.server_hostname,
784+                'SERVER_PORT':        self._req.server.port,
785+                'SERVER_PROTOCOL':    self._req.protocol,
786+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
787+                'SERVER_SOFTWARE':    'mod_python'
788             }
789             for key, value in self._req.headers_in.items():
790                 key = 'HTTP_' + key.upper().replace('-', '_')
791@@ -122,6 +128,18 @@
792     def _get_method(self):
793         return self.META['REQUEST_METHOD'].upper()
794 
795+    def _get_file_progress_id(self):
796+        """
797+        Returns the Progress ID of the request,
798+        usually provided if there is a file upload
799+        going on.
800+        Returns ``None`` if no progress ID is specified.
801+        """
802+        return self._get_file_progress_from_args(self._req.headers_in,
803+                                                 self.GET,
804+                                                 self._req.args)
805+
806+
807     GET = property(_get_get, _set_get)
808     POST = property(_get_post, _set_post)
809     COOKIES = property(_get_cookies, _set_cookies)
810Index: django/utils/file.py
811===================================================================
812--- django/utils/file.py        (revision 0)
813+++ django/utils/file.py        (revision 0)
814@@ -0,0 +1,36 @@
815+import os
816+
817+try:
818+    import shutils
819+    file_move = shutils.move
820+except:
821+    file_move = os.rename
822+
823+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64):
824+    """
825+    Moves a file from one location to another in the safest way possible.
826+   
827+    First, it tries using shutils.move, which is OS-dependent but doesn't
828+    break with change of filesystems. Then it tries os.rename, which will
829+    break if it encounters a change in filesystems. Lastly, it streams
830+    it manually from one file to another in python.
831+    """
832+   
833+    try:
834+        file_move(old_file_name, new_file_name)
835+        return
836+    except:
837+        pass
838+   
839+    new_file = open(new_file_name, 'wb')
840+    old_file = open(old_file_name, 'rb')
841+    current_chunk = None
842+   
843+    while current_chunk != '':
844+        current_chunk = old_file.read(chunk_size)
845+        new_file.write(current_chunk)
846+       
847+    new_file.close()
848+    old_file.close()
849+
850+    os.remove(old_file_name)
851Index: tests/modeltests/test_client/views.py
852===================================================================
853--- tests/modeltests/test_client/views.py       (revision 5126)
854+++ tests/modeltests/test_client/views.py       (working copy)
855@@ -44,6 +44,12 @@
856 
857     return HttpResponse(t.render(c))
858 
859+def post_file_view(request):
860+    "A view that expects a multipart post and returns a file in the context"
861+    t = Template('File {{ file.filename }} received', name='POST Template')
862+    c = Context({'file': request.FILES['file_file']})
863+    return HttpResponse(t.render(c))
864+
865 def redirect_view(request):
866     "A view that redirects all requests to the GET view"
867     return HttpResponseRedirect('/test_client/get_view/')
868Index: tests/modeltests/test_client/models.py
869===================================================================
870--- tests/modeltests/test_client/models.py      (revision 5126)
871+++ tests/modeltests/test_client/models.py      (working copy)
872@@ -75,6 +75,21 @@
873         self.assertEqual(response.template.name, "Book template")
874         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
875 
876+    def test_post_file_view(self):
877+        "POST this python file to a view"
878+        import os, tempfile
879+        from django.conf import settings
880+        file = __file__.replace('.pyc', '.py')
881+        for upload_dir in [None, tempfile.gettempdir()]:
882+            settings.FILE_UPLOAD_DIR = upload_dir
883+            post_data = { 'name': file, 'file': open(file) }
884+            response = self.client.post('/test_client/post_file_view/', post_data)
885+            self.failUnless('models.py' in response.context['file']['filename'])
886+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
887+            if upload_dir:
888+                self.failUnless(response.context['file']['tmpfilename'])
889+
890+
891     def test_redirect(self):
892         "GET a URL that redirects elsewhere"
893         response = self.client.get('/test_client/redirect_view/')
894Index: tests/modeltests/test_client/urls.py
895===================================================================
896--- tests/modeltests/test_client/urls.py        (revision 5126)
897+++ tests/modeltests/test_client/urls.py        (working copy)
898@@ -4,6 +4,7 @@
899 urlpatterns = patterns('',
900     (r'^get_view/$', views.get_view),
901     (r'^post_view/$', views.post_view),
902+    (r'^post_file_view/$', views.post_file_view),
903     (r'^raw_post_view/$', views.raw_post_view),
904     (r'^redirect_view/$', views.redirect_view),
905     (r'^form_view/$', views.form_view),
906Index: docs/request_response.txt
907===================================================================
908--- docs/request_response.txt   (revision 5126)
909+++ docs/request_response.txt   (working copy)
910@@ -72,13 +72,25 @@
911 ``FILES``
912     A dictionary-like object containing all uploaded files. Each key in
913     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
914-    value in ``FILES`` is a standard Python dictionary with the following three
915+    value in ``FILES`` is a standard Python dictionary with the following four
916     keys:
917 
918         * ``filename`` -- The name of the uploaded file, as a Python string.
919         * ``content-type`` -- The content type of the uploaded file.
920         * ``content`` -- The raw content of the uploaded file.
921+        * ``content-length`` -- The length of the content in bytes.
922 
923+    If streaming file uploads are enabled two additional keys
924+    describing the uploaded file will be present:
925+
926+       * ``tmpfilename`` -- The filename for the temporary file.
927+       * ``tmpfile`` -- An open file object for the temporary file.
928+
929+    The temporary file will be removed when the request finishes.
930+
931+    Note that accessing ``content`` when streaming uploads are enabled
932+    will read the whole file into memory which may not be what you want.
933+
934     Note that ``FILES`` will only contain data if the request method was POST
935     and the ``<form>`` that posted to the request had
936     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
937Index: docs/settings.txt
938===================================================================
939--- docs/settings.txt   (revision 5126)
940+++ docs/settings.txt   (working copy)
941@@ -439,6 +439,15 @@
942 
943 .. _Testing Django Applications: ../testing/
944 
945+FILE_UPLOAD_DIR
946+---------------
947+
948+Default: ``None``
949+
950+Path to a directory where temporary files should be written during
951+file uploads. Leaving this as ``None`` will disable streaming file uploads,
952+and cause all uploaded files to be stored (temporarily) in memory.
953+
954 IGNORABLE_404_ENDS
955 ------------------
956 
957@@ -782,6 +791,16 @@
958 
959 .. _site framework docs: ../sites/
960 
961+STREAMING_MIN_POST_SIZE
962+-----------------------
963+
964+Default: 524288 (``512*1024``)
965+
966+An integer specifying the minimum number of bytes that has to be
967+received (in a POST) for file upload streaming to take place. Any
968+request smaller than this will be handled in memory.
969+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
970+
971 TEMPLATE_CONTEXT_PROCESSORS
972 ---------------------------
973 
974Index: docs/forms.txt
975===================================================================
976--- docs/forms.txt      (revision 5126)
977+++ docs/forms.txt      (working copy)
978@@ -475,6 +475,19 @@
979    new_data = request.POST.copy()
980    new_data.update(request.FILES)
981 
982+Streaming file uploads.
983+-----------------------
984+
985+File uploads will be read into memory by default. This works fine for
986+small to medium sized uploads (from 1MB to 100MB depending on your
987+setup and usage). If you want to support larger uploads you can enable
988+upload streaming where only a small part of the file will be in memory
989+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
990+setting (see the settings_ document for more details).
991+
992+See `request object`_ for more details about ``request.FILES`` objects
993+with streaming file uploads enabled.
994+
995 Validators
996 ==========
997 
998@@ -697,3 +710,4 @@
999 .. _`generic views`: ../generic_views/
1000 .. _`models API`: ../model-api/
1001 .. _settings: ../settings/
1002+.. _request object: ../request_response/#httprequest-objects