Code

Ticket #2070: 5079-streaming_file_upload_with_safe_file_move.diff

File 5079-streaming_file_upload_with_safe_file_move.diff, 29.1 KB (added by Michael Axiak <axiak@…>, 7 years ago)

Cleaned it up a bit. Moved file_move_safe into django.utils in case it should be used in future endeavors.

Line 
1Index: django/http/__init__.py
2===================================================================
3--- django/http/__init__.py     (revision 5079)
4+++ django/http/__init__.py     (working copy)
5@@ -1,9 +1,14 @@
6-import os
7+import os, pickle
8 from Cookie import SimpleCookie
9 from pprint import pformat
10 from urllib import urlencode, quote
11 from django.utils.datastructures import MultiValueDict
12 
13+try:
14+    from cStringIO import StringIO
15+except ImportError:
16+    from StringIO import StringIO
17+
18 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
19 
20 try:
21@@ -42,37 +47,316 @@
22     def is_secure(self):
23         return os.environ.get("HTTPS") == "on"
24 
25-def parse_file_upload(header_dict, post_data):
26-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
27-    import email, email.Message
28-    from cgi import parse_header
29-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
30-    raw_message += '\r\n\r\n' + post_data
31-    msg = email.message_from_string(raw_message)
32-    POST = MultiValueDict()
33-    FILES = MultiValueDict()
34-    for submessage in msg.get_payload():
35-        if submessage and isinstance(submessage, email.Message.Message):
36-            name_dict = parse_header(submessage['Content-Disposition'])[1]
37-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
38-            # or {'name': 'blah'} for POST fields
39-            # We assume all uploaded files have a 'filename' set.
40-            if name_dict.has_key('filename'):
41-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
42-                if not name_dict['filename'].strip():
43-                    continue
44-                # IE submits the full path, so trim everything but the basename.
45-                # (We can't use os.path.basename because it expects Linux paths.)
46-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
47-                FILES.appendlist(name_dict['name'], {
48-                    'filename': filename,
49-                    'content-type': (submessage.has_key('Content-Type') and submessage['Content-Type'] or None),
50-                    'content': submessage.get_payload(),
51-                })
52-            else:
53-                POST.appendlist(name_dict['name'], submessage.get_payload())
54-    return POST, FILES
55+def parse_file_upload(headers, input):
56+    from django.conf import settings
57 
58+    # Only stream files to disk if FILE_STREAMING_DIR is set
59+    file_upload_dir = settings.FILE_UPLOAD_DIR
60+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
61+
62+    try:
63+        parser = MultiPartParser(headers, input, file_upload_dir, streaming_min_post_size)
64+        return parser.parse()
65+    except MultiPartParserError, e:
66+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
67+
68+class MultiPartParserError(Exception):
69+    def __init__(self, message):
70+        self.message = message
71+    def __str__(self):
72+        return repr(self.message)
73+       
74+class MultiPartParser(object):
75+    """
76+    A rfc2388 multipart/form-data parser.
77+   
78+    parse() reads the input stream in chunk_size chunks and returns a
79+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
80+    file_upload_dir is defined files will be streamed to temporary
81+    files in the specified directory.
82+
83+    The FILES dictionary will have 'filename', 'content-type',
84+    'content' and 'content-length' entries. For streamed files it will
85+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
86+    only be read from disk when referenced for streamed files.
87+
88+    If the header X-Progress-ID is sent with a 32 character hex string
89+    a temporary file with the same name will be created in
90+    `file_upload_dir`` with a pickled { 'received', 'size' }
91+    dictionary with the number of bytes received and the size expected
92+    respectively. The file will be unlinked when the parser finishes.
93+
94+    """
95+
96+    def __init__(self, headers, input, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
97+        try:
98+            content_length = int(headers['Content-Length'])
99+        except:
100+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
101+
102+        content_type = headers.get('Content-Type')
103+
104+        if not content_type or not content_type.startswith('multipart/'):
105+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
106+           
107+        ctype, opts = self.parse_header(content_type)
108+        boundary = opts.get('boundary')
109+        from cgi import valid_boundary
110+        if not boundary or not valid_boundary(boundary):
111+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
112+
113+        # check if we got a valid X-Progress-ID id
114+        progress_id = headers.get('X-Progress-ID')
115+        if file_upload_dir and progress_id:
116+            import re
117+            if re.match(r'^[0-9a-zA-Z]{32}$', progress_id):
118+                self._progress_filename = os.path.join(file_upload_dir, progress_id)
119+                raise MultiPartParserError('Invalid X-Progress-ID: %s' % progress_id)
120+        else:
121+            self._progress_filename = None
122+        self._boundary = '--' + boundary
123+        self._input = input
124+        self._size = content_length
125+        self._received = 0
126+        self._file_upload_dir = file_upload_dir
127+        self._chunk_size = chunk_size
128+        self._state = 'PREAMBLE'
129+        self._partial = ''
130+        self._post = MultiValueDict()
131+        self._files = MultiValueDict()
132+
133+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
134+            self._file_upload_dir = None # disable file streaming for small request
135+
136+        try:
137+            # use mx fast string search if available
138+            from mx.TextTools import FS
139+            self._fs = FS(self._boundary)
140+        except ImportError:
141+            self._fs = None
142+
143+    def parse(self):
144+        try:
145+            self._parse()
146+        finally:
147+            if self._progress_filename:
148+                try:
149+                    os.unlink(self._progress_filename)
150+                except OSError:
151+                    pass
152+       
153+        return self._post, self._files
154+
155+    def _parse(self):
156+        size = self._size
157+
158+        try:
159+            while size > 0:
160+                n = self._read(self._input, min(self._chunk_size, size))
161+                if not n:
162+                    break
163+                size -= n
164+        except:
165+            # consume any remaining data so we dont generate a "Connection Reset" error
166+            size = self._size - self._received
167+            while size > 0:
168+                data = self._input.read(min(self._chunk_size, size))
169+                size -= len(data)
170+            raise
171+
172+    def _find_boundary(self, data, start, stop):
173+        """
174+        Find the next boundary and return the end of current part
175+        and start of next part.
176+        """
177+        if self._fs:
178+            boundary = self._fs.find(data, start, stop)
179+        else:
180+            boundary = data.find(self._boundary, start, stop)
181+        if boundary >= 0:
182+            end = boundary
183+            next = boundary + len(self._boundary)
184+
185+            # backup over CRLF
186+            if end > 0 and data[end-1] == '\n': end -= 1
187+            if end > 0 and data[end-1] == '\r': end -= 1
188+            # skip over --CRLF
189+            if next < stop and data[next] == '-': next += 1
190+            if next < stop and data[next] == '-': next += 1
191+            if next < stop and data[next] == '\r': next += 1
192+            if next < stop and data[next] == '\n': next += 1
193+
194+            return True, end, next
195+        else:
196+            return False, stop, stop
197+
198+    class TemporaryFile(object):
199+        "A temporary file that tries to delete itself when garbage collected."
200+        def __init__(self, dir):
201+            import tempfile
202+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
203+            self.file = os.fdopen(fd, 'w+b')
204+            self.name = name
205+
206+        def __getattr__(self, name):
207+            a = getattr(self.__dict__['file'], name)
208+            if type(a) != type(0):
209+                setattr(self, name, a)
210+            return a
211+
212+        def __del__(self):
213+            try:
214+                os.unlink(self.name)
215+            except OSError:
216+                pass
217+           
218+    class LazyContent(dict):
219+        """
220+        A lazy FILES dictionary entry that reads the contents from
221+        tmpfile only when referenced.
222+        """
223+        def __init__(self, data):
224+            dict.__init__(self, data)
225+       
226+        def __getitem__(self, key):
227+            if key == 'content' and not self.has_key(key):
228+                self['tmpfile'].seek(0)
229+                self['content'] = self['tmpfile'].read()
230+            return dict.__getitem__(self, key)
231+
232+    def _read(self, input, size):
233+        data = input.read(size)
234+
235+        if not data:
236+            return 0
237+
238+        read_size = len(data)
239+        self._received += read_size
240+
241+        if self._partial:
242+            data = self._partial + data
243+
244+        start = 0
245+        stop = len(data)
246+       
247+        while start < stop:
248+            boundary, end, next = self._find_boundary(data, start, stop)
249+
250+            if not boundary and read_size:
251+                # make sure we dont treat a partial boundary (and its separators) as data
252+                stop -= len(self._boundary) + 16
253+                end = next = stop
254+                if end <= start:
255+                    break # need more data
256+
257+            if self._state == 'PREAMBLE':
258+                # Preamble, just ignore it
259+                self._state = 'HEADER'
260+
261+            elif self._state == 'HEADER':
262+                # Beginning of header, look for end of header and parse it if found.
263+
264+                header_end = data.find('\r\n\r\n', start, stop)
265+                if header_end == -1:
266+                    break # need more data
267+
268+                header = data[start:header_end]
269+
270+                self._fieldname = None
271+                self._filename = None
272+                self._content_type = None
273+
274+                for line in header.split('\r\n'):
275+                    ctype, opts = self.parse_header(line)
276+                    if ctype == 'content-disposition: form-data':
277+                        self._fieldname = opts.get('name')
278+                        self._filename = opts.get('filename')
279+                    elif ctype.startswith('content-type: '):
280+                        self._content_type = ctype[14:]
281+
282+                if self._filename is not None:
283+                    # cleanup filename from IE full paths:
284+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
285+
286+                    if self._filename: # ignore files without filenames
287+                        if self._file_upload_dir:
288+                            try:
289+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
290+                            except:
291+                                raise MultiPartParserError("Failed to create temporary file.")
292+                        else:
293+                            self._file = StringIO()
294+                    else:
295+                        self._file = None
296+                    self._filesize = 0
297+                    self._state = 'FILE'
298+                else:
299+                    self._field = StringIO()
300+                    self._state = 'FIELD'
301+                next = header_end + 4
302+
303+            elif self._state == 'FIELD':
304+                # In a field, collect data until a boundary is found.
305+
306+                self._field.write(data[start:end])
307+                if boundary:
308+                    if self._fieldname:
309+                        self._post.appendlist(self._fieldname, self._field.getvalue())
310+                    self._field.close()
311+                    self._state = 'HEADER'
312+
313+            elif self._state == 'FILE':
314+                # In a file, collect data until a boundary is found.
315+
316+                if self._file:
317+                    try:
318+                        self._file.write(data[start:end])
319+                    except IOError, e:
320+                        raise MultiPartParserError("Failed to write to temporary file.")
321+                    self._filesize += end-start
322+
323+                    if self._progress_filename:
324+                        f = open(os.path.join(self._file_upload_dir, self._progress_filename), 'w')
325+                        pickle.dump({ 'received': self._received, 'size': self._size }, f)
326+                        f.close()
327+
328+                if boundary:
329+                    if self._file:
330+                        if self._file_upload_dir:
331+                            self._file.seek(0)
332+                            file = self.LazyContent({
333+                                'filename': self._filename,
334+                                'content-type':  self._content_type,
335+                                # 'content': is read on demand
336+                                'content-length': self._filesize,
337+                                'tmpfilename': self._file.name,
338+                                'tmpfile': self._file
339+                            })
340+                        else:
341+                            file = {
342+                                'filename': self._filename,
343+                                'content-type':  self._content_type,
344+                                'content': self._file.getvalue(),
345+                                'content-length': self._filesize
346+                            }
347+                            self._file.close()
348+
349+                        self._files.appendlist(self._fieldname, file)
350+
351+                    self._state = 'HEADER'
352+
353+            start = next
354+               
355+        self._partial = data[start:]
356+
357+        return read_size
358+
359+    def parse_header(self, line):
360+        from cgi import parse_header
361+        return parse_header(line)
362+
363+
364+
365 class QueryDict(MultiValueDict):
366     """A specialized MultiValueDict that takes a query string when initialized.
367     This is immutable unless you create a copy of it."""
368@@ -306,3 +590,4 @@
369     if not host:
370         host = request.META.get('HTTP_HOST', '')
371     return host
372+
373Index: django/conf/global_settings.py
374===================================================================
375--- django/conf/global_settings.py      (revision 5079)
376+++ django/conf/global_settings.py      (working copy)
377@@ -240,6 +240,20 @@
378 # isExistingURL validator.
379 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
380 
381+# The directory to place streamed file uploads. The web server needs write
382+# permissions on this directory.
383+# If this is None, streaming uploads are disabled.
384+FILE_UPLOAD_DIR = None
385+
386+
387+# The minimum size of a POST before file uploads are streamed to disk.
388+# Any less than this number, and the file is uploaded to memory.
389+# Size is in bytes.
390+STREAMING_MIN_POST_SIZE = 512 * (2**10)
391+
392+
393+
394+
395 ##############
396 # MIDDLEWARE #
397 ##############
398@@ -335,3 +349,5 @@
399 
400 # The list of directories to search for fixtures
401 FIXTURE_DIRS = ()
402+
403+
404Index: django/db/models/base.py
405===================================================================
406--- django/db/models/base.py    (revision 5079)
407+++ django/db/models/base.py    (working copy)
408@@ -12,12 +12,14 @@
409 from django.dispatch import dispatcher
410 from django.utils.datastructures import SortedDict
411 from django.utils.functional import curry
412+from django.utils.file import file_move_safe
413 from django.conf import settings
414 from itertools import izip
415 import types
416 import sys
417 import os
418 
419+               
420 class ModelBase(type):
421     "Metaclass for all models"
422     def __new__(cls, name, bases, attrs):
423@@ -361,7 +363,7 @@
424     def _get_FIELD_size(self, field):
425         return os.path.getsize(self._get_FIELD_filename(field))
426 
427-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
428+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
429         directory = field.get_directory_name()
430         try: # Create the date-based directory if it doesn't exist.
431             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
432@@ -383,9 +385,13 @@
433         setattr(self, field.attname, filename)
434 
435         full_filename = self._get_FIELD_filename(field)
436-        fp = open(full_filename, 'wb')
437-        fp.write(raw_contents)
438-        fp.close()
439+        if raw_field.has_key('tmpfilename'):
440+            raw_field['tmpfile'].close()
441+            file_move_safe(raw_field['tmpfilename'], full_filename)
442+        else:
443+            fp = open(full_filename, 'wb')
444+            fp.write(raw_field['content'])
445+            fp.close()
446 
447         # Save the width and/or height, if applicable.
448         if isinstance(field, ImageField) and (field.width_field or field.height_field):
449Index: django/db/models/fields/__init__.py
450===================================================================
451--- django/db/models/fields/__init__.py (revision 5079)
452+++ django/db/models/fields/__init__.py (working copy)
453@@ -636,7 +636,7 @@
454         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
455         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
456         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
457-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
458+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
459         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
460 
461     def delete_file(self, instance):
462@@ -659,9 +659,9 @@
463         if new_data.get(upload_field_name, False):
464             func = getattr(new_object, 'save_%s_file' % self.name)
465             if rel:
466-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
467+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
468             else:
469-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
470+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
471 
472     def get_directory_name(self):
473         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
474Index: django/oldforms/__init__.py
475===================================================================
476--- django/oldforms/__init__.py (revision 5079)
477+++ django/oldforms/__init__.py (working copy)
478@@ -666,17 +666,22 @@
479         self.validator_list = [self.isNonEmptyFile] + validator_list
480 
481     def isNonEmptyFile(self, field_data, all_data):
482-        try:
483-            content = field_data['content']
484-        except TypeError:
485+        if field_data.has_key('_file_upload_error'):
486+            raise validators.CriticalValidationError, field_data['_file_upload_error']
487+        if not field_data.has_key('filename'):
488             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
489-        if not content:
490+        if not field_data['content-length']:
491             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
492 
493     def render(self, data):
494         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
495             (self.get_id(), self.__class__.__name__, self.field_name)
496 
497+    def prepare(self, new_data):
498+        if new_data.has_key('_file_upload_error'):
499+            # pretend we got something in the field to raise a validation error later
500+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
501+
502     def html2python(data):
503         if data is None:
504             raise EmptyValue
505Index: django/core/handlers/wsgi.py
506===================================================================
507--- django/core/handlers/wsgi.py        (revision 5079)
508+++ django/core/handlers/wsgi.py        (working copy)
509@@ -111,7 +111,14 @@
510             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
511                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
512                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
513-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
514+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
515+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
516+                try:
517+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'])
518+                except:
519+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
520+                    raise
521+                self._raw_post_data = None # raw data is not available for streamed multipart messages
522             else:
523                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
524         else:
525Index: django/core/handlers/modpython.py
526===================================================================
527--- django/core/handlers/modpython.py   (revision 5079)
528+++ django/core/handlers/modpython.py   (working copy)
529@@ -47,7 +47,12 @@
530     def _load_post_and_files(self):
531         "Populates self._post and self._files"
532         if self._req.headers_in.has_key('content-type') and self._req.headers_in['content-type'].startswith('multipart'):
533-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
534+            self._raw_post_data = None # raw data is not available for streamed multipart messages
535+            try:
536+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req)
537+            except:
538+                self._post, self._files = {}, {} # make sure we dont read the input stream again
539+                raise
540         else:
541             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
542 
543Index: django/utils/file.py
544===================================================================
545--- django/utils/file.py        (revision 0)
546+++ django/utils/file.py        (revision 0)
547@@ -0,0 +1,36 @@
548+import os
549+
550+try:
551+    import shutils
552+    file_move = shutils.move
553+except:
554+    file_move = os.rename
555+
556+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64):
557+    """
558+    Moves a file from one location to another in the safest way possible.
559+   
560+    First, it tries using shutils.move, which is OS-dependent but doesn't
561+    break with change of filesystems. Then it tries os.rename, which will
562+    break if it encounters a change in filesystems. Lastly, it streams
563+    it manually from one file to another in python.
564+    """
565+   
566+    try:
567+        file_move(old_file_name, new_file_name)
568+        return
569+    except:
570+        pass
571+   
572+    new_file = open(new_file_name, 'wb')
573+    old_file = open(old_file_name, 'rb')
574+    current_chunk = None
575+   
576+    while current_chunk != '':
577+        current_chunk = old_file.read(chunk_size)
578+        new_file.write(current_chunk)
579+       
580+    new_file.close()
581+    old_file.close()
582+
583+    os.remove(old_file_name)
584Index: tests/modeltests/test_client/views.py
585===================================================================
586--- tests/modeltests/test_client/views.py       (revision 5079)
587+++ tests/modeltests/test_client/views.py       (working copy)
588@@ -44,6 +44,12 @@
589 
590     return HttpResponse(t.render(c))
591 
592+def post_file_view(request):
593+    "A view that expects a multipart post and returns a file in the context"
594+    t = Template('File {{ file.filename }} received', name='POST Template')
595+    c = Context({'file': request.FILES['file_file']})
596+    return HttpResponse(t.render(c))
597+
598 def redirect_view(request):
599     "A view that redirects all requests to the GET view"
600     return HttpResponseRedirect('/test_client/get_view/')
601Index: tests/modeltests/test_client/models.py
602===================================================================
603--- tests/modeltests/test_client/models.py      (revision 5079)
604+++ tests/modeltests/test_client/models.py      (working copy)
605@@ -75,6 +75,21 @@
606         self.assertEqual(response.template.name, "Book template")
607         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
608 
609+    def test_post_file_view(self):
610+        "POST this python file to a view"
611+        import os, tempfile
612+        from django.conf import settings
613+        file = __file__.replace('.pyc', '.py')
614+        for upload_dir in [None, tempfile.gettempdir()]:
615+            settings.FILE_UPLOAD_DIR = upload_dir
616+            post_data = { 'name': file, 'file': open(file) }
617+            response = self.client.post('/test_client/post_file_view/', post_data)
618+            self.failUnless('models.py' in response.context['file']['filename'])
619+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
620+            if upload_dir:
621+                self.failUnless(response.context['file']['tmpfilename'])
622+
623+
624     def test_redirect(self):
625         "GET a URL that redirects elsewhere"
626         response = self.client.get('/test_client/redirect_view/')
627Index: tests/modeltests/test_client/urls.py
628===================================================================
629--- tests/modeltests/test_client/urls.py        (revision 5079)
630+++ tests/modeltests/test_client/urls.py        (working copy)
631@@ -4,6 +4,7 @@
632 urlpatterns = patterns('',
633     (r'^get_view/$', views.get_view),
634     (r'^post_view/$', views.post_view),
635+    (r'^post_file_view/$', views.post_file_view),
636     (r'^raw_post_view/$', views.raw_post_view),
637     (r'^redirect_view/$', views.redirect_view),
638     (r'^form_view/$', views.form_view),
639Index: docs/request_response.txt
640===================================================================
641--- docs/request_response.txt   (revision 5079)
642+++ docs/request_response.txt   (working copy)
643@@ -72,13 +72,25 @@
644 ``FILES``
645     A dictionary-like object containing all uploaded files. Each key in
646     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
647-    value in ``FILES`` is a standard Python dictionary with the following three
648+    value in ``FILES`` is a standard Python dictionary with the following four
649     keys:
650 
651         * ``filename`` -- The name of the uploaded file, as a Python string.
652         * ``content-type`` -- The content type of the uploaded file.
653         * ``content`` -- The raw content of the uploaded file.
654+        * ``content-length`` -- The length of the content in bytes.
655 
656+    If streaming file uploads are enabled two additional keys
657+    describing the uploaded file will be present:
658+
659+       * ``tmpfilename`` -- The filename for the temporary file.
660+       * ``tmpfile`` -- An open file object for the temporary file.
661+
662+    The temporary file will be removed when the request finishes.
663+
664+    Note that accessing ``content`` when streaming uploads are enabled
665+    will read the whole file into memory which may not be what you want.
666+
667     Note that ``FILES`` will only contain data if the request method was POST
668     and the ``<form>`` that posted to the request had
669     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
670Index: docs/settings.txt
671===================================================================
672--- docs/settings.txt   (revision 5079)
673+++ docs/settings.txt   (working copy)
674@@ -437,6 +437,15 @@
675 
676 .. _Testing Django Applications: ../testing/
677 
678+FILE_UPLOAD_DIR
679+---------------
680+
681+Default: ``None``
682+
683+Path to a directory where temporary files should be written during
684+file uploads. Leaving this as ``None`` will disable streaming file uploads,
685+and cause all uploaded files to be stored (temporarily) in memory.
686+
687 IGNORABLE_404_ENDS
688 ------------------
689 
690@@ -774,6 +783,16 @@
691 
692 .. _site framework docs: ../sites/
693 
694+STREAMING_MIN_POST_SIZE
695+-----------------------
696+
697+Default: 524288 (``512*1024``)
698+
699+An integer specifying the minimum number of bytes that has to be
700+received (in a POST) for file upload streaming to take place. Any
701+request smaller than this will be handled in memory.
702+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
703+
704 TEMPLATE_CONTEXT_PROCESSORS
705 ---------------------------
706 
707Index: docs/forms.txt
708===================================================================
709--- docs/forms.txt      (revision 5079)
710+++ docs/forms.txt      (working copy)
711@@ -475,6 +475,19 @@
712    new_data = request.POST.copy()
713    new_data.update(request.FILES)
714 
715+Streaming file uploads.
716+-----------------------
717+
718+File uploads will be read into memory by default. This works fine for
719+small to medium sized uploads (from 1MB to 100MB depending on your
720+setup and usage). If you want to support larger uploads you can enable
721+upload streaming where only a small part of the file will be in memory
722+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
723+setting (see the settings_ document for more details).
724+
725+See `request object`_ for more details about ``request.FILES`` objects
726+with streaming file uploads enabled.
727+
728 Validators
729 ==========
730 
731@@ -693,3 +706,4 @@
732 .. _`generic views`: ../generic_views/
733 .. _`models API`: ../model-api/
734 .. _settings: ../settings/
735+.. _request object: ../request_response/#httprequest-objects