Code

Ticket #2070: 5099_patch_for_streaming_uploads.diff

File 5099_patch_for_streaming_uploads.diff, 30.8 KB (added by Michael Axiak <axiak@…>, 7 years ago)

Uses multiple mechanisms for determining the progress id.

Line 
1Index: django/http/__init__.py
2===================================================================
3--- django/http/__init__.py     (revision 5099)
4+++ django/http/__init__.py     (working copy)
5@@ -1,11 +1,18 @@
6-import os
7+import os, pickle
8 from Cookie import SimpleCookie
9 from pprint import pformat
10 from urllib import urlencode, quote
11 from django.utils.datastructures import MultiValueDict
12+import re
13 
14+try:
15+    from cStringIO import StringIO
16+except ImportError:
17+    from StringIO import StringIO
18+
19 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
20 
21+
22 try:
23     # The mod_python version is more efficient, so try importing it first.
24     from mod_python.util import parse_qsl
25@@ -17,6 +24,10 @@
26 
27 class HttpRequest(object):
28     "A basic HTTP request"
29+
30+    upload_id_re = re.compile(r'[a-fA-F0-9]{32}')
31+
32+
33     def __init__(self):
34         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
35         self.path = ''
36@@ -42,37 +53,312 @@
37     def is_secure(self):
38         return os.environ.get("HTTPS") == "on"
39 
40-def parse_file_upload(header_dict, post_data):
41-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
42-    import email, email.Message
43-    from cgi import parse_header
44-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
45-    raw_message += '\r\n\r\n' + post_data
46-    msg = email.message_from_string(raw_message)
47-    POST = MultiValueDict()
48-    FILES = MultiValueDict()
49-    for submessage in msg.get_payload():
50-        if submessage and isinstance(submessage, email.Message.Message):
51-            name_dict = parse_header(submessage['Content-Disposition'])[1]
52-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
53-            # or {'name': 'blah'} for POST fields
54-            # We assume all uploaded files have a 'filename' set.
55-            if 'filename' in name_dict:
56-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
57-                if not name_dict['filename'].strip():
58-                    continue
59-                # IE submits the full path, so trim everything but the basename.
60-                # (We can't use os.path.basename because it expects Linux paths.)
61-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
62-                FILES.appendlist(name_dict['name'], {
63-                    'filename': filename,
64-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
65-                    'content': submessage.get_payload(),
66-                })
67-            else:
68-                POST.appendlist(name_dict['name'], submessage.get_payload())
69-    return POST, FILES
70+def parse_file_upload(headers, input, progress_id = None):
71+    from django.conf import settings
72 
73+    # Only stream files to disk if FILE_STREAMING_DIR is set
74+    file_upload_dir = settings.FILE_UPLOAD_DIR
75+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
76+
77+    try:
78+        parser = MultiPartParser(headers, input, progress_id, file_upload_dir, streaming_min_post_size)
79+        return parser.parse()
80+    except MultiPartParserError, e:
81+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
82+
83+class MultiPartParserError(Exception):
84+    def __init__(self, message):
85+        self.message = message
86+    def __str__(self):
87+        return repr(self.message)
88+       
89+class MultiPartParser(object):
90+    """
91+    A rfc2388 multipart/form-data parser.
92+   
93+    parse() reads the input stream in chunk_size chunks and returns a
94+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
95+    file_upload_dir is defined files will be streamed to temporary
96+    files in the specified directory.
97+
98+    The FILES dictionary will have 'filename', 'content-type',
99+    'content' and 'content-length' entries. For streamed files it will
100+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
101+    only be read from disk when referenced for streamed files.
102+
103+    If the header X-Progress-ID is sent with a 32 character hex string
104+    a temporary file with the same name will be created in
105+    `file_upload_dir`` with a pickled { 'received', 'size' }
106+    dictionary with the number of bytes received and the size expected
107+    respectively. The file will be unlinked when the parser finishes.
108+
109+    """
110+
111+    def __init__(self, headers, input, progress_id=None, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
112+        try:
113+            content_length = int(headers['Content-Length'])
114+        except:
115+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
116+
117+        content_type = headers.get('Content-Type')
118+
119+        if not content_type or not content_type.startswith('multipart/'):
120+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
121+           
122+        ctype, opts = self.parse_header(content_type)
123+        boundary = opts.get('boundary')
124+        from cgi import valid_boundary
125+        if not boundary or not valid_boundary(boundary):
126+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
127+
128+        # check if we got a valid X-Progress-ID id
129+        if file_upload_dir and progress_id:
130+            import re
131+            if re.match(r'^[0-9a-zA-Z]{32}$', progress_id):
132+                self._progress_filename = os.path.join(file_upload_dir, progress_id)
133+                raise MultiPartParserError('Invalid X-Progress-ID: %s' % progress_id)
134+        else:
135+            self._progress_filename = None
136+        self._boundary = '--' + boundary
137+        self._input = input
138+        self._size = content_length
139+        self._received = 0
140+        self._file_upload_dir = file_upload_dir
141+        self._chunk_size = chunk_size
142+        self._state = 'PREAMBLE'
143+        self._partial = ''
144+        self._post = MultiValueDict()
145+        self._files = MultiValueDict()
146+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
147+            self._file_upload_dir = None # disable file streaming for small request
148+
149+        try:
150+            # use mx fast string search if available
151+            from mx.TextTools import FS
152+            self._fs = FS(self._boundary)
153+        except ImportError:
154+            self._fs = None
155+
156+    def parse(self):
157+        try:
158+            self._parse()
159+        finally:
160+            if self._progress_filename:
161+                try:
162+                    os.unlink(self._progress_filename)
163+                except OSError:
164+                    pass
165+       
166+        return self._post, self._files
167+
168+    def _parse(self):
169+        size = self._size
170+
171+        try:
172+            while size > 0:
173+                n = self._read(self._input, min(self._chunk_size, size))
174+                if not n:
175+                    break
176+                size -= n
177+        except:
178+            # consume any remaining data so we dont generate a "Connection Reset" error
179+            size = self._size - self._received
180+            while size > 0:
181+                data = self._input.read(min(self._chunk_size, size))
182+                size -= len(data)
183+            raise
184+
185+    def _find_boundary(self, data, start, stop):
186+        """
187+        Find the next boundary and return the end of current part
188+        and start of next part.
189+        """
190+        if self._fs:
191+            boundary = self._fs.find(data, start, stop)
192+        else:
193+            boundary = data.find(self._boundary, start, stop)
194+        if boundary >= 0:
195+            end = boundary
196+            next = boundary + len(self._boundary)
197+
198+            # backup over CRLF
199+            if end > 0 and data[end-1] == '\n': end -= 1
200+            if end > 0 and data[end-1] == '\r': end -= 1
201+            # skip over --CRLF
202+            if next < stop and data[next] == '-': next += 1
203+            if next < stop and data[next] == '-': next += 1
204+            if next < stop and data[next] == '\r': next += 1
205+            if next < stop and data[next] == '\n': next += 1
206+
207+            return True, end, next
208+        else:
209+            return False, stop, stop
210+
211+    class TemporaryFile(object):
212+        "A temporary file that tries to delete itself when garbage collected."
213+        def __init__(self, dir):
214+            import tempfile
215+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
216+            self.file = os.fdopen(fd, 'w+b')
217+            self.name = name
218+
219+        def __getattr__(self, name):
220+            a = getattr(self.__dict__['file'], name)
221+            if type(a) != type(0):
222+                setattr(self, name, a)
223+            return a
224+
225+        def __del__(self):
226+            try:
227+                os.unlink(self.name)
228+            except OSError:
229+                pass
230+           
231+    class LazyContent(dict):
232+        """
233+        A lazy FILES dictionary entry that reads the contents from
234+        tmpfile only when referenced.
235+        """
236+        def __init__(self, data):
237+            dict.__init__(self, data)
238+       
239+        def __getitem__(self, key):
240+            if key == 'content' and not self.has_key(key):
241+                self['tmpfile'].seek(0)
242+                self['content'] = self['tmpfile'].read()
243+            return dict.__getitem__(self, key)
244+
245+    def _read(self, input, size):
246+        data = input.read(size)
247+
248+        if not data:
249+            return 0
250+
251+        read_size = len(data)
252+        self._received += read_size
253+
254+        if self._partial:
255+            data = self._partial + data
256+
257+        start = 0
258+        stop = len(data)
259+       
260+        while start < stop:
261+            boundary, end, next = self._find_boundary(data, start, stop)
262+
263+            if not boundary and read_size:
264+                # make sure we dont treat a partial boundary (and its separators) as data
265+                stop -= len(self._boundary) + 16
266+                end = next = stop
267+                if end <= start:
268+                    break # need more data
269+
270+            if self._state == 'PREAMBLE':
271+                # Preamble, just ignore it
272+                self._state = 'HEADER'
273+
274+            elif self._state == 'HEADER':
275+                # Beginning of header, look for end of header and parse it if found.
276+
277+                header_end = data.find('\r\n\r\n', start, stop)
278+                if header_end == -1:
279+                    break # need more data
280+
281+                header = data[start:header_end]
282+
283+                self._fieldname = None
284+                self._filename = None
285+                self._content_type = None
286+
287+                for line in header.split('\r\n'):
288+                    ctype, opts = self.parse_header(line)
289+                    if ctype == 'content-disposition: form-data':
290+                        self._fieldname = opts.get('name')
291+                        self._filename = opts.get('filename')
292+                    elif ctype.startswith('content-type: '):
293+                        self._content_type = ctype[14:]
294+
295+                if self._filename is not None:
296+                    # cleanup filename from IE full paths:
297+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
298+
299+                    if self._filename: # ignore files without filenames
300+                        if self._file_upload_dir:
301+                            try:
302+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
303+                            except:
304+                                raise MultiPartParserError("Failed to create temporary file.")
305+                        else:
306+                            self._file = StringIO()
307+                    else:
308+                        self._file = None
309+                    self._filesize = 0
310+                    self._state = 'FILE'
311+                else:
312+                    self._field = StringIO()
313+                    self._state = 'FIELD'
314+                next = header_end + 4
315+
316+            elif self._state == 'FIELD':
317+                # In a field, collect data until a boundary is found.
318+
319+                self._field.write(data[start:end])
320+                if boundary:
321+                    if self._fieldname:
322+                        self._post.appendlist(self._fieldname, self._field.getvalue())
323+                    self._field.close()
324+                    self._state = 'HEADER'
325+
326+            elif self._state == 'FILE':
327+                # In a file, collect data until a boundary is found.
328+
329+                if self._file:
330+                    try:
331+                        self._file.write(data[start:end])
332+                    except IOError, e:
333+                        raise MultiPartParserError("Failed to write to temporary file.")
334+                    self._filesize += end-start
335+
336+                    if self._progress_filename:
337+                        f = open(os.path.join(self._file_upload_dir, self._progress_filename), 'w')
338+                        pickle.dump({ 'received': self._received, 'size': self._size }, f)
339+                        f.close()
340+
341+                if boundary:
342+                    if self._file:
343+                        if self._file_upload_dir:
344+                            self._file.seek(0)
345+                            file = self.LazyContent({
346+                                'filename': self._filename,
347+                                'content-type':  self._content_type,
348+                                # 'content': is read on demand
349+                                'content-length': self._filesize,
350+                                'tmpfilename': self._file.name,
351+                                'tmpfile': self._file
352+                            })
353+                        else:
354+                            file = {
355+                                'filename': self._filename,
356+                                'content-type':  self._content_type,
357+                                'content': self._file.getvalue(),
358+                                'content-length': self._filesize
359+                            }
360+                            self._file.close()
361+
362+                        self._files.appendlist(self._fieldname, file)
363+
364+                    self._state = 'HEADER'
365+
366+            start = next
367+               
368+        self._partial = data[start:]
369+
370+        return read_size
371+
372+    def parse_header(self, line):
373+        from cgi import parse_header
374+        return parse_header(line)
375+
376 class QueryDict(MultiValueDict):
377     """A specialized MultiValueDict that takes a query string when initialized.
378     This is immutable unless you create a copy of it."""
379@@ -306,3 +592,4 @@
380     if not host:
381         host = request.META.get('HTTP_HOST', '')
382     return host
383+
384Index: django/conf/global_settings.py
385===================================================================
386--- django/conf/global_settings.py      (revision 5099)
387+++ django/conf/global_settings.py      (working copy)
388@@ -240,6 +240,20 @@
389 # isExistingURL validator.
390 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
391 
392+# The directory to place streamed file uploads. The web server needs write
393+# permissions on this directory.
394+# If this is None, streaming uploads are disabled.
395+FILE_UPLOAD_DIR = None
396+
397+
398+# The minimum size of a POST before file uploads are streamed to disk.
399+# Any less than this number, and the file is uploaded to memory.
400+# Size is in bytes.
401+STREAMING_MIN_POST_SIZE = 512 * (2**10)
402+
403+
404+
405+
406 ##############
407 # MIDDLEWARE #
408 ##############
409@@ -335,3 +349,5 @@
410 
411 # The list of directories to search for fixtures
412 FIXTURE_DIRS = ()
413+
414+
415Index: django/db/models/base.py
416===================================================================
417--- django/db/models/base.py    (revision 5099)
418+++ django/db/models/base.py    (working copy)
419@@ -12,12 +12,14 @@
420 from django.dispatch import dispatcher
421 from django.utils.datastructures import SortedDict
422 from django.utils.functional import curry
423+from django.utils.file import file_move_safe
424 from django.conf import settings
425 from itertools import izip
426 import types
427 import sys
428 import os
429 
430+               
431 class ModelBase(type):
432     "Metaclass for all models"
433     def __new__(cls, name, bases, attrs):
434@@ -361,7 +363,7 @@
435     def _get_FIELD_size(self, field):
436         return os.path.getsize(self._get_FIELD_filename(field))
437 
438-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
439+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
440         directory = field.get_directory_name()
441         try: # Create the date-based directory if it doesn't exist.
442             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
443@@ -383,9 +385,13 @@
444         setattr(self, field.attname, filename)
445 
446         full_filename = self._get_FIELD_filename(field)
447-        fp = open(full_filename, 'wb')
448-        fp.write(raw_contents)
449-        fp.close()
450+        if raw_field.has_key('tmpfilename'):
451+            raw_field['tmpfile'].close()
452+            file_move_safe(raw_field['tmpfilename'], full_filename)
453+        else:
454+            fp = open(full_filename, 'wb')
455+            fp.write(raw_field['content'])
456+            fp.close()
457 
458         # Save the width and/or height, if applicable.
459         if isinstance(field, ImageField) and (field.width_field or field.height_field):
460Index: django/db/models/fields/__init__.py
461===================================================================
462--- django/db/models/fields/__init__.py (revision 5099)
463+++ django/db/models/fields/__init__.py (working copy)
464@@ -636,7 +636,7 @@
465         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
466         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
467         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
468-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
469+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
470         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
471 
472     def delete_file(self, instance):
473@@ -659,9 +659,9 @@
474         if new_data.get(upload_field_name, False):
475             func = getattr(new_object, 'save_%s_file' % self.name)
476             if rel:
477-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
478+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
479             else:
480-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
481+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
482 
483     def get_directory_name(self):
484         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
485Index: django/oldforms/__init__.py
486===================================================================
487--- django/oldforms/__init__.py (revision 5099)
488+++ django/oldforms/__init__.py (working copy)
489@@ -666,17 +666,22 @@
490         self.validator_list = [self.isNonEmptyFile] + validator_list
491 
492     def isNonEmptyFile(self, field_data, all_data):
493-        try:
494-            content = field_data['content']
495-        except TypeError:
496+        if field_data.has_key('_file_upload_error'):
497+            raise validators.CriticalValidationError, field_data['_file_upload_error']
498+        if not field_data.has_key('filename'):
499             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
500-        if not content:
501+        if not field_data['content-length']:
502             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
503 
504     def render(self, data):
505         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
506             (self.get_id(), self.__class__.__name__, self.field_name)
507 
508+    def prepare(self, new_data):
509+        if new_data.has_key('_file_upload_error'):
510+            # pretend we got something in the field to raise a validation error later
511+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
512+
513     def html2python(data):
514         if data is None:
515             raise EmptyValue
516Index: django/core/handlers/wsgi.py
517===================================================================
518--- django/core/handlers/wsgi.py        (revision 5099)
519+++ django/core/handlers/wsgi.py        (working copy)
520@@ -111,7 +111,14 @@
521             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
522                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
523                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
524-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
525+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
526+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
527+                try:
528+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self.progress_id)
529+                except:
530+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
531+                    raise
532+                self._raw_post_data = None # raw data is not available for streamed multipart messages
533             else:
534                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
535         else:
536@@ -167,6 +174,36 @@
537             buf.close()
538             return self._raw_post_data
539 
540+    def _get_file_progress_id(self):
541+        """
542+        Returns the Progress ID of the request,
543+        usually provided if there is a file upload
544+        going on.
545+        Returns ``None`` if no progress ID is specified.
546+        """
547+        _get = self.GET
548+        _querystring = self.environ.get('QUERY_STRING', '')
549+
550+        if 'HTTP_X_UPLOAD_ID' in self.environ:
551+            progress_id = self.environ['HTTP_X_UPLOAD_ID']
552+        elif 'HTTP_X_PROGRESS_ID' in self.environ:
553+            progress_id = self.environ['HTTP_X_PROGRESS_ID']
554+        elif 'upload_id' in _get:
555+            progress_id = _get['upload_id']
556+        elif 'progress_id' in _get:
557+            progress_id = _get['progress_id']
558+        elif len(_querystring) == 32:
559+            progress_id = _querystring
560+        else:
561+            return None
562+
563+        if not self.upload_id_re.match(progress_id):
564+            return None
565+
566+        return progress_id
567+
568+    progress_id = property(_get_file_progress_id)
569+
570     GET = property(_get_get, _set_get)
571     POST = property(_get_post, _set_post)
572     COOKIES = property(_get_cookies, _set_cookies)
573Index: django/core/handlers/modpython.py
574===================================================================
575--- django/core/handlers/modpython.py   (revision 5099)
576+++ django/core/handlers/modpython.py   (working copy)
577@@ -47,7 +47,12 @@
578     def _load_post_and_files(self):
579         "Populates self._post and self._files"
580         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
581-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
582+            self._raw_post_data = None # raw data is not available for streamed multipart messages
583+            try:
584+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self.progress_id)
585+            except:
586+                self._post, self._files = {}, {} # make sure we dont read the input stream again
587+                raise
588         else:
589             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
590 
591@@ -122,6 +127,35 @@
592     def _get_method(self):
593         return self.META['REQUEST_METHOD'].upper()
594 
595+    def _get_file_progress_id(self):
596+        """
597+        Returns the Progress ID of the request,
598+        usually provided if there is a file upload
599+        going on.
600+        Returns ``None`` if no progress ID is specified.
601+        """
602+        _get = self.GET
603+
604+        if 'X-Upload-ID' in self._req.headers_in:
605+            progress_id = self._req.headers_in['X-Upload-ID']
606+        elif 'X-Progress-ID' in self._req.headers_in:
607+            progress_id = self._req.headers_in['X-Progress-ID']
608+        elif 'upload_id' in _get:
609+            progress_id = _get['upload_id']
610+        elif 'progress_id' in _get:
611+            progress_id = _get['progress_id']
612+        elif self._req.args != None and len(self._req.args.strip()) == 32:
613+            progress_id = self._req.args
614+        else:
615+            return None
616+
617+        if not self.upload_id_re.match(progress_id):
618+            return None
619+
620+        return progress_id
621+
622+    progress_id = property(_get_file_progress_id)
623+
624     GET = property(_get_get, _set_get)
625     POST = property(_get_post, _set_post)
626     COOKIES = property(_get_cookies, _set_cookies)
627Index: tests/modeltests/test_client/views.py
628===================================================================
629--- tests/modeltests/test_client/views.py       (revision 5099)
630+++ tests/modeltests/test_client/views.py       (working copy)
631@@ -44,6 +44,12 @@
632 
633     return HttpResponse(t.render(c))
634 
635+def post_file_view(request):
636+    "A view that expects a multipart post and returns a file in the context"
637+    t = Template('File {{ file.filename }} received', name='POST Template')
638+    c = Context({'file': request.FILES['file_file']})
639+    return HttpResponse(t.render(c))
640+
641 def redirect_view(request):
642     "A view that redirects all requests to the GET view"
643     return HttpResponseRedirect('/test_client/get_view/')
644Index: tests/modeltests/test_client/models.py
645===================================================================
646--- tests/modeltests/test_client/models.py      (revision 5099)
647+++ tests/modeltests/test_client/models.py      (working copy)
648@@ -75,6 +75,21 @@
649         self.assertEqual(response.template.name, "Book template")
650         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
651 
652+    def test_post_file_view(self):
653+        "POST this python file to a view"
654+        import os, tempfile
655+        from django.conf import settings
656+        file = __file__.replace('.pyc', '.py')
657+        for upload_dir in [None, tempfile.gettempdir()]:
658+            settings.FILE_UPLOAD_DIR = upload_dir
659+            post_data = { 'name': file, 'file': open(file) }
660+            response = self.client.post('/test_client/post_file_view/', post_data)
661+            self.failUnless('models.py' in response.context['file']['filename'])
662+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
663+            if upload_dir:
664+                self.failUnless(response.context['file']['tmpfilename'])
665+
666+
667     def test_redirect(self):
668         "GET a URL that redirects elsewhere"
669         response = self.client.get('/test_client/redirect_view/')
670Index: tests/modeltests/test_client/urls.py
671===================================================================
672--- tests/modeltests/test_client/urls.py        (revision 5099)
673+++ tests/modeltests/test_client/urls.py        (working copy)
674@@ -4,6 +4,7 @@
675 urlpatterns = patterns('',
676     (r'^get_view/$', views.get_view),
677     (r'^post_view/$', views.post_view),
678+    (r'^post_file_view/$', views.post_file_view),
679     (r'^raw_post_view/$', views.raw_post_view),
680     (r'^redirect_view/$', views.redirect_view),
681     (r'^form_view/$', views.form_view),
682Index: docs/request_response.txt
683===================================================================
684--- docs/request_response.txt   (revision 5099)
685+++ docs/request_response.txt   (working copy)
686@@ -72,13 +72,25 @@
687 ``FILES``
688     A dictionary-like object containing all uploaded files. Each key in
689     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
690-    value in ``FILES`` is a standard Python dictionary with the following three
691+    value in ``FILES`` is a standard Python dictionary with the following four
692     keys:
693 
694         * ``filename`` -- The name of the uploaded file, as a Python string.
695         * ``content-type`` -- The content type of the uploaded file.
696         * ``content`` -- The raw content of the uploaded file.
697+        * ``content-length`` -- The length of the content in bytes.
698 
699+    If streaming file uploads are enabled two additional keys
700+    describing the uploaded file will be present:
701+
702+       * ``tmpfilename`` -- The filename for the temporary file.
703+       * ``tmpfile`` -- An open file object for the temporary file.
704+
705+    The temporary file will be removed when the request finishes.
706+
707+    Note that accessing ``content`` when streaming uploads are enabled
708+    will read the whole file into memory which may not be what you want.
709+
710     Note that ``FILES`` will only contain data if the request method was POST
711     and the ``<form>`` that posted to the request had
712     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
713Index: docs/settings.txt
714===================================================================
715--- docs/settings.txt   (revision 5099)
716+++ docs/settings.txt   (working copy)
717@@ -437,6 +437,15 @@
718 
719 .. _Testing Django Applications: ../testing/
720 
721+FILE_UPLOAD_DIR
722+---------------
723+
724+Default: ``None``
725+
726+Path to a directory where temporary files should be written during
727+file uploads. Leaving this as ``None`` will disable streaming file uploads,
728+and cause all uploaded files to be stored (temporarily) in memory.
729+
730 IGNORABLE_404_ENDS
731 ------------------
732 
733@@ -780,6 +789,16 @@
734 
735 .. _site framework docs: ../sites/
736 
737+STREAMING_MIN_POST_SIZE
738+-----------------------
739+
740+Default: 524288 (``512*1024``)
741+
742+An integer specifying the minimum number of bytes that has to be
743+received (in a POST) for file upload streaming to take place. Any
744+request smaller than this will be handled in memory.
745+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
746+
747 TEMPLATE_CONTEXT_PROCESSORS
748 ---------------------------
749 
750Index: docs/forms.txt
751===================================================================
752--- docs/forms.txt      (revision 5099)
753+++ docs/forms.txt      (working copy)
754@@ -475,6 +475,19 @@
755    new_data = request.POST.copy()
756    new_data.update(request.FILES)
757 
758+Streaming file uploads.
759+-----------------------
760+
761+File uploads will be read into memory by default. This works fine for
762+small to medium sized uploads (from 1MB to 100MB depending on your
763+setup and usage). If you want to support larger uploads you can enable
764+upload streaming where only a small part of the file will be in memory
765+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
766+setting (see the settings_ document for more details).
767+
768+See `request object`_ for more details about ``request.FILES`` objects
769+with streaming file uploads enabled.
770+
771 Validators
772 ==========
773 
774@@ -693,3 +706,4 @@
775 .. _`generic views`: ../generic_views/
776 .. _`models API`: ../model-api/
777 .. _settings: ../settings/
778+.. _request object: ../request_response/#httprequest-objects