Code

Ticket #2070: 5343_cleaned_streaming_file_upload_tweaked2.diff

File 5343_cleaned_streaming_file_upload_tweaked2.diff, 37.3 KB (added by klaus.blindert@…, 7 years ago)

Fixed microscopic bug in an error path

Line 
1Index: django/http/multipartparser.py
2===================================================================
3--- django/http/multipartparser.py      (revision 0)
4+++ django/http/multipartparser.py      (revision 0)
5@@ -0,0 +1,325 @@
6+"""
7+MultiPart parsing for file uploads.
8+If both a progress id is sent (either through ``X-Progress-ID``
9+header or ``progress_id`` GET) and ``FILE_UPLOAD_DIR`` is set
10+in the settings, then the file progress will be tracked using
11+``request.file_progress``.
12+
13+To use this feature, consider creating a middleware with an appropriate
14+``process_request``::
15+
16+    class FileProgressTrack(object):
17+        def __get__(self, request, HttpRequest):
18+            progress_id = request.META['UPLOAD_PROGRESS_ID']
19+            status = # get progress from progress_id here
20+
21+            return status
22+
23+        def __set__(self, request, new_value):
24+            progress_id = request.META['UPLOAD_PROGRESS_ID']
25+
26+            # set the progress using progress_id here.
27+
28+    # example middleware
29+    class FileProgressExample(object):
30+        def process_request(self, request):
31+            request.__class__.file_progress = FileProgressTrack()
32+
33+
34+
35+"""
36+
37+__all__ = ['MultiPartParserError','MultiPartParser']
38+
39+
40+from django.utils.datastructures import MultiValueDict
41+import os
42+
43+try:
44+    from cStringIO import StringIO
45+except ImportError:
46+    from StringIO import StringIO
47+
48+
49+class MultiPartParserError(Exception):
50+    def __init__(self, message):
51+        self.message = message
52+    def __str__(self):
53+        return repr(self.message)
54+
55+class MultiPartParser(object):
56+    """
57+    A rfc2388 multipart/form-data parser.
58+   
59+    parse() reads the input stream in chunk_size chunks and returns a
60+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
61+    file_upload_dir is defined files will be streamed to temporary
62+    files in the specified directory.
63+
64+    The FILES dictionary will have 'filename', 'content-type',
65+    'content' and 'content-length' entries. For streamed files it will
66+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
67+    only be read from disk when referenced for streamed files.
68+
69+    If the X-Progress-ID is sent (in one of many formats), then
70+    object.file_progress will be given a dictionary of the progress.
71+    """
72+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
73+        try:
74+            content_length = int(headers['Content-Length'])
75+        except:
76+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
77+
78+        content_type = headers.get('Content-Type')
79+
80+        if not content_type or not content_type.startswith('multipart/'):
81+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
82+           
83+        ctype, opts = self.parse_header(content_type)
84+        boundary = opts.get('boundary')
85+        from cgi import valid_boundary
86+        if not boundary or not valid_boundary(boundary):
87+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
88+
89+        progress_id = request.META['UPLOAD_PROGRESS_ID']
90+
91+        self._track_progress = file_upload_dir and progress_id # whether or not to track progress
92+        self._boundary = '--' + boundary
93+        self._input = input
94+        self._size = content_length
95+        self._received = 0
96+        self._file_upload_dir = file_upload_dir
97+        self._chunk_size = chunk_size
98+        self._state = 'PREAMBLE'
99+        self._partial = ''
100+        self._post = MultiValueDict()
101+        self._files = MultiValueDict()
102+        self._request = request
103+
104+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
105+            self._file_upload_dir = None # disable file streaming for small request
106+        elif self._track_progress:
107+            request.file_progress = {'state': 'starting'}
108+
109+        try:
110+            # Use mx fast string search if available.
111+            from mx.TextTools import FS
112+            self._fs = FS(self._boundary)
113+        except ImportError:
114+            self._fs = None
115+
116+    def parse(self):
117+        try:
118+            self._parse()
119+        finally:
120+            if self._track_progress:
121+                self._request.file_progress = {'state': 'done'}
122+        return self._post, self._files
123+
124+    def _parse(self):
125+        size = self._size
126+
127+        try:
128+            while size > 0:
129+                n = self._read(self._input, min(self._chunk_size, size))
130+                if not n:
131+                    break
132+                size -= n
133+        except:
134+            # consume any remaining data so we dont generate a "Connection Reset" error
135+            size = self._size - self._received
136+            while size > 0:
137+                data = self._input.read(min(self._chunk_size, size))
138+                size -= len(data)
139+            raise
140+
141+    def _find_boundary(self, data, start, stop):
142+        """
143+        Find the next boundary and return the end of current part
144+        and start of next part.
145+        """
146+        if self._fs:
147+            boundary = self._fs.find(data, start, stop)
148+        else:
149+            boundary = data.find(self._boundary, start, stop)
150+        if boundary >= 0:
151+            end = boundary
152+            next = boundary + len(self._boundary)
153+
154+            # backup over CRLF
155+            if end > 0 and data[end-1] == '\n': end -= 1
156+            if end > 0 and data[end-1] == '\r': end -= 1
157+            # skip over --CRLF
158+            if next < stop and data[next] == '-': next += 1
159+            if next < stop and data[next] == '-': next += 1
160+            if next < stop and data[next] == '\r': next += 1
161+            if next < stop and data[next] == '\n': next += 1
162+
163+            return True, end, next
164+        else:
165+            return False, stop, stop
166+
167+    class TemporaryFile(object):
168+        "A temporary file that tries to delete itself when garbage collected."
169+        def __init__(self, dir):
170+            import tempfile
171+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
172+            self.file = os.fdopen(fd, 'w+b')
173+            self.name = name
174+
175+        def __getattr__(self, name):
176+            a = getattr(self.__dict__['file'], name)
177+            if type(a) != type(0):
178+                setattr(self, name, a)
179+            return a
180+
181+        def __del__(self):
182+            try:
183+                os.unlink(self.name)
184+            except OSError:
185+                pass
186+
187+    class LazyContent(dict):
188+        """
189+        A lazy FILES dictionary entry that reads the contents from
190+        tmpfile only when referenced.
191+        """
192+        def __init__(self, data):
193+            dict.__init__(self, data)
194+
195+        def __getitem__(self, key):
196+            if key == 'content' and not self.has_key(key):
197+                self['tmpfile'].seek(0)
198+                self['content'] = self['tmpfile'].read()
199+            return dict.__getitem__(self, key)
200+
201+    def _read(self, input, size):
202+        data = input.read(size)
203+
204+        if not data:
205+            return 0
206+
207+        read_size = len(data)
208+        self._received += read_size
209+
210+        if self._partial:
211+            data = self._partial + data
212+
213+        start = 0
214+        stop = len(data)
215+
216+        while start < stop:
217+            boundary, end, next = self._find_boundary(data, start, stop)
218+
219+            if not boundary and read_size:
220+                # make sure we dont treat a partial boundary (and its separators) as data
221+                stop -= len(self._boundary) + 16
222+                end = next = stop
223+                if end <= start:
224+                    break # need more data
225+
226+            if self._state == 'PREAMBLE':
227+                # Preamble, just ignore it
228+                self._state = 'HEADER'
229+
230+            elif self._state == 'HEADER':
231+                # Beginning of header, look for end of header and parse it if found.
232+
233+                header_end = data.find('\r\n\r\n', start, stop)
234+                if header_end == -1:
235+                    break # need more data
236+
237+                header = data[start:header_end]
238+
239+                self._fieldname = None
240+                self._filename = None
241+                self._content_type = None
242+
243+                for line in header.split('\r\n'):
244+                    ctype, opts = self.parse_header(line)
245+                    if ctype == 'content-disposition: form-data':
246+                        self._fieldname = opts.get('name')
247+                        self._filename = opts.get('filename')
248+                    elif ctype.startswith('content-type: '):
249+                        self._content_type = ctype[14:]
250+
251+                if self._filename is not None:
252+                    # cleanup filename from IE full paths:
253+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
254+
255+                    if self._filename: # ignore files without filenames
256+                        if self._file_upload_dir:
257+                            try:
258+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
259+                            except (OSError, IOError), e:
260+                                raise MultiPartParserError("Failed to create temporary file. Error was %s" % e)
261+                        else:
262+                            self._file = StringIO()
263+                    else:
264+                        self._file = None
265+                    self._filesize = 0
266+                    self._state = 'FILE'
267+                else:
268+                    self._field = StringIO()
269+                    self._state = 'FIELD'
270+                next = header_end + 4
271+
272+            elif self._state == 'FIELD':
273+                # In a field, collect data until a boundary is found.
274+
275+                self._field.write(data[start:end])
276+                if boundary:
277+                    if self._fieldname:
278+                        self._post.appendlist(self._fieldname, self._field.getvalue())
279+                    self._field.close()
280+                    self._state = 'HEADER'
281+
282+            elif self._state == 'FILE':
283+                # In a file, collect data until a boundary is found.
284+
285+                if self._file:
286+                    try:
287+                        self._file.write(data[start:end])
288+                    except IOError, e:
289+                        raise MultiPartParserError("Failed to write to temporary file.")
290+                    self._filesize += end-start
291+
292+                    if self._track_progress:
293+                        self._request.file_progress = {'received': self._received,
294+                                                       'size':     self._size,
295+                                                       'state':    'uploading'}
296+
297+                if boundary:
298+                    if self._file:
299+                        if self._file_upload_dir:
300+                            self._file.seek(0)
301+                            file = self.LazyContent({
302+                                'filename': self._filename,
303+                                'content-type':  self._content_type,
304+                                # 'content': is read on demand
305+                                'content-length': self._filesize,
306+                                'tmpfilename': self._file.name,
307+                                'tmpfile': self._file
308+                            })
309+                        else:
310+                            file = {
311+                                'filename': self._filename,
312+                                'content-type':  self._content_type,
313+                                'content': self._file.getvalue(),
314+                                'content-length': self._filesize
315+                            }
316+                            self._file.close()
317+
318+                        self._files.appendlist(self._fieldname, file)
319+
320+                    self._state = 'HEADER'
321+
322+            start = next
323+
324+        self._partial = data[start:]
325+
326+        return read_size
327+
328+    def parse_header(self, line):
329+        from cgi import parse_header
330+        return parse_header(line)
331Index: django/http/__init__.py
332===================================================================
333--- django/http/__init__.py     (revision 5343)
334+++ django/http/__init__.py     (working copy)
335@@ -3,7 +3,11 @@
336 from pprint import pformat
337 from urllib import urlencode, quote
338 from django.utils.datastructures import MultiValueDict
339+from django.http.multipartparser import MultiPartParser, MultiPartParserError
340+import re
341 
342+upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression
343+
344 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
345 
346 try:
347@@ -42,37 +46,55 @@
348     def is_secure(self):
349         return os.environ.get("HTTPS") == "on"
350 
351-def parse_file_upload(header_dict, post_data):
352-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
353-    import email, email.Message
354-    from cgi import parse_header
355-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
356-    raw_message += '\r\n\r\n' + post_data
357-    msg = email.message_from_string(raw_message)
358-    POST = MultiValueDict()
359-    FILES = MultiValueDict()
360-    for submessage in msg.get_payload():
361-        if submessage and isinstance(submessage, email.Message.Message):
362-            name_dict = parse_header(submessage['Content-Disposition'])[1]
363-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
364-            # or {'name': 'blah'} for POST fields
365-            # We assume all uploaded files have a 'filename' set.
366-            if 'filename' in name_dict:
367-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
368-                if not name_dict['filename'].strip():
369-                    continue
370-                # IE submits the full path, so trim everything but the basename.
371-                # (We can't use os.path.basename because it expects Linux paths.)
372-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
373-                FILES.appendlist(name_dict['name'], {
374-                    'filename': filename,
375-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
376-                    'content': submessage.get_payload(),
377-                })
378-            else:
379-                POST.appendlist(name_dict['name'], submessage.get_payload())
380-    return POST, FILES
381+    def _get_file_progress(self):
382+        return {}
383 
384+    def _set_file_progress(self,value):
385+        pass
386+
387+    def _del_file_progress(self):
388+        pass
389+
390+    file_progress = property(_get_file_progress,
391+                             _set_file_progress,
392+                             _del_file_progress)
393+
394+    def _get_file_progress_from_args(self, headers, get, querystring):
395+        """
396+        This parses the request for a file progress_id value.
397+        Note that there are two distinct ways of getting the progress
398+        ID -- header and GET. One is used primarily to attach via JavaScript
399+        to the end of an HTML form action while the other is used for AJAX
400+        communication.
401+
402+        All progress IDs must be valid 32-digit hexadecimal numbers.
403+        """
404+        if 'X-Upload-ID' in headers:
405+            progress_id = headers['X-Upload-ID']
406+        elif 'progress_id' in get:
407+            progress_id = get['progress_id']
408+        else:
409+            return None
410+
411+        if not upload_id_re.match(progress_id):
412+            return None
413+
414+        return progress_id
415+
416+def parse_file_upload(headers, input, request):
417+    from django.conf import settings
418+
419+    # Only stream files to disk if FILE_STREAMING_DIR is set
420+    file_upload_dir = settings.FILE_UPLOAD_DIR
421+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
422+
423+    try:
424+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
425+        return parser.parse()
426+    except MultiPartParserError, e:
427+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
428+
429+
430 class QueryDict(MultiValueDict):
431     """A specialized MultiValueDict that takes a query string when initialized.
432     This is immutable unless you create a copy of it."""
433Index: django/oldforms/__init__.py
434===================================================================
435--- django/oldforms/__init__.py (revision 5343)
436+++ django/oldforms/__init__.py (working copy)
437@@ -666,17 +666,22 @@
438         self.validator_list = [self.isNonEmptyFile] + validator_list
439 
440     def isNonEmptyFile(self, field_data, all_data):
441-        try:
442-            content = field_data['content']
443-        except TypeError:
444+        if field_data.has_key('_file_upload_error'):
445+            raise validators.CriticalValidationError, field_data['_file_upload_error']
446+        if not field_data.has_key('filename'):
447             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
448-        if not content:
449+        if not field_data['content-length']:
450             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
451 
452     def render(self, data):
453         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
454             (self.get_id(), self.__class__.__name__, self.field_name)
455 
456+    def prepare(self, new_data):
457+        if new_data.has_key('_file_upload_error'):
458+            # pretend we got something in the field to raise a validation error later
459+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
460+
461     def html2python(data):
462         if data is None:
463             raise EmptyValue
464Index: django/db/models/base.py
465===================================================================
466--- django/db/models/base.py    (revision 5343)
467+++ django/db/models/base.py    (working copy)
468@@ -12,6 +12,7 @@
469 from django.dispatch import dispatcher
470 from django.utils.datastructures import SortedDict
471 from django.utils.functional import curry
472+from django.utils.file import file_move_safe
473 from django.conf import settings
474 from itertools import izip
475 import types
476@@ -361,12 +362,16 @@
477     def _get_FIELD_size(self, field):
478         return os.path.getsize(self._get_FIELD_filename(field))
479 
480-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
481+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
482         directory = field.get_directory_name()
483         try: # Create the date-based directory if it doesn't exist.
484             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
485         except OSError: # Directory probably already exists.
486             pass
487+
488+        if filename is None:
489+            filename = raw_field['filename']
490+
491         filename = field.get_filename(filename)
492 
493         # If the filename already exists, keep adding an underscore to the name of
494@@ -383,9 +388,16 @@
495         setattr(self, field.attname, filename)
496 
497         full_filename = self._get_FIELD_filename(field)
498-        fp = open(full_filename, 'wb')
499-        fp.write(raw_contents)
500-        fp.close()
501+        if raw_field.has_key('tmpfilename'):
502+            raw_field['tmpfile'].close()
503+            file_move_safe(raw_field['tmpfilename'], full_filename)
504+        else:
505+            from django.utils import file_locks
506+            fp = open(full_filename, 'wb')
507+            # exclusive lock
508+            file_locks.lock(fp, file_locks.LOCK_EX)
509+            fp.write(raw_field['content'])
510+            fp.close()
511 
512         # Save the width and/or height, if applicable.
513         if isinstance(field, ImageField) and (field.width_field or field.height_field):
514Index: django/db/models/fields/__init__.py
515===================================================================
516--- django/db/models/fields/__init__.py (revision 5343)
517+++ django/db/models/fields/__init__.py (working copy)
518@@ -701,7 +701,8 @@
519         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
520         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
521         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
522-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
523+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
524+        setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save))       
525         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
526 
527     def delete_file(self, instance):
528@@ -724,9 +725,9 @@
529         if new_data.get(upload_field_name, False):
530             func = getattr(new_object, 'save_%s_file' % self.name)
531             if rel:
532-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
533+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
534             else:
535-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
536+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
537 
538     def get_directory_name(self):
539         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
540Index: django/conf/global_settings.py
541===================================================================
542--- django/conf/global_settings.py      (revision 5343)
543+++ django/conf/global_settings.py      (working copy)
544@@ -242,6 +242,16 @@
545 # isExistingURL validator.
546 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
547 
548+# The directory to place streamed file uploads. The web server needs write
549+# permissions on this directory.
550+# If this is None, streaming uploads are disabled.
551+FILE_UPLOAD_DIR = None
552+
553+# The minimum size of a POST before file uploads are streamed to disk.
554+# Any less than this number, and the file is uploaded to memory.
555+# Size is in bytes.
556+STREAMING_MIN_POST_SIZE = 512 * (2**10)
557+
558 ##############
559 # MIDDLEWARE #
560 ##############
561Index: django/core/handlers/wsgi.py
562===================================================================
563--- django/core/handlers/wsgi.py        (revision 5343)
564+++ django/core/handlers/wsgi.py        (working copy)
565@@ -75,6 +75,7 @@
566         self.environ = environ
567         self.path = environ['PATH_INFO']
568         self.META = environ
569+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
570         self.method = environ['REQUEST_METHOD'].upper()
571 
572     def __repr__(self):
573@@ -111,7 +112,14 @@
574             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
575                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
576                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
577-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
578+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
579+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
580+                try:
581+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
582+                except:
583+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
584+                    raise
585+                self._raw_post_data = None # raw data is not available for streamed multipart messages
586             else:
587                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
588         else:
589@@ -167,6 +175,17 @@
590             buf.close()
591             return self._raw_post_data
592 
593+    def _get_file_progress_id(self):
594+        """
595+        Returns the Progress ID of the request,
596+        usually provided if there is a file upload
597+        going on.
598+        Returns ``None`` if no progress ID is specified.
599+        """
600+        return self._get_file_progress_from_args(self.environ,
601+                                                 self.GET,
602+                                                 self.environ.get('QUERY_STRING', ''))
603+
604     GET = property(_get_get, _set_get)
605     POST = property(_get_post, _set_post)
606     COOKIES = property(_get_cookies, _set_cookies)
607Index: django/core/handlers/modpython.py
608===================================================================
609--- django/core/handlers/modpython.py   (revision 5343)
610+++ django/core/handlers/modpython.py   (working copy)
611@@ -47,7 +47,12 @@
612     def _load_post_and_files(self):
613         "Populates self._post and self._files"
614         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
615-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
616+            self._raw_post_data = None # raw data is not available for streamed multipart messages
617+            try:
618+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
619+            except:
620+                self._post, self._files = {}, {} # make sure we dont read the input stream again
621+                raise
622         else:
623             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
624 
625@@ -92,20 +97,21 @@
626                 'AUTH_TYPE':         self._req.ap_auth_type,
627                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
628                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
629-                'GATEWAY_INTERFACE': 'CGI/1.1',
630-                'PATH_INFO':         self._req.path_info,
631-                'PATH_TRANSLATED':   None, # Not supported
632-                'QUERY_STRING':      self._req.args,
633-                'REMOTE_ADDR':       self._req.connection.remote_ip,
634-                'REMOTE_HOST':       None, # DNS lookups not supported
635-                'REMOTE_IDENT':      self._req.connection.remote_logname,
636-                'REMOTE_USER':       self._req.user,
637-                'REQUEST_METHOD':    self._req.method,
638-                'SCRIPT_NAME':       None, # Not supported
639-                'SERVER_NAME':       self._req.server.server_hostname,
640-                'SERVER_PORT':       self._req.server.port,
641-                'SERVER_PROTOCOL':   self._req.protocol,
642-                'SERVER_SOFTWARE':   'mod_python'
643+                'GATEWAY_INTERFACE':  'CGI/1.1',
644+                'PATH_INFO':          self._req.path_info,
645+                'PATH_TRANSLATED':    None, # Not supported
646+                'QUERY_STRING':       self._req.args,
647+                'REMOTE_ADDR':        self._req.connection.remote_ip,
648+                'REMOTE_HOST':        None, # DNS lookups not supported
649+                'REMOTE_IDENT':       self._req.connection.remote_logname,
650+                'REMOTE_USER':        self._req.user,
651+                'REQUEST_METHOD':     self._req.method,
652+                'SCRIPT_NAME':        None, # Not supported
653+                'SERVER_NAME':        self._req.server.server_hostname,
654+                'SERVER_PORT':        self._req.server.port,
655+                'SERVER_PROTOCOL':    self._req.protocol,
656+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
657+                'SERVER_SOFTWARE':    'mod_python'
658             }
659             for key, value in self._req.headers_in.items():
660                 key = 'HTTP_' + key.upper().replace('-', '_')
661@@ -122,6 +128,17 @@
662     def _get_method(self):
663         return self.META['REQUEST_METHOD'].upper()
664 
665+    def _get_file_progress_id(self):
666+        """
667+        Returns the Progress ID of the request,
668+        usually provided if there is a file upload
669+        going on.
670+        Returns ``None`` if no progress ID is specified.
671+        """
672+        return self._get_file_progress_from_args(self._req.headers_in,
673+                                                 self.GET,
674+                                                 self._req.args)
675+
676     GET = property(_get_get, _set_get)
677     POST = property(_get_post, _set_post)
678     COOKIES = property(_get_cookies, _set_cookies)
679Index: django/utils/file_locks.py
680===================================================================
681--- django/utils/file_locks.py  (revision 0)
682+++ django/utils/file_locks.py  (revision 0)
683@@ -0,0 +1,50 @@
684+"""
685+Locking portability by Jonathan Feignberg <jdf@pobox.com> in python cookbook
686+
687+Example Usage::
688+
689+    from django.utils import file_locks
690+
691+    f = open('./file', 'wb')
692+
693+    file_locks.lock(f, file_locks.LOCK_EX)
694+    f.write('Django')
695+    f.close()
696+"""
697+
698+
699+import os
700+
701+__all__ = ['LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock']
702+
703+if os.name == 'nt':
704+       import win32con
705+       import win32file
706+       import pywintypes
707+       LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
708+       LOCK_SH = 0
709+       LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
710+       __overlapped = pywintypes.OVERLAPPED()
711+elif os.name == 'posix':
712+       import fcntl
713+       LOCK_EX = fcntl.LOCK_EX
714+       LOCK_SH = fcntl.LOCK_SH
715+       LOCK_NB = fcntl.LOCK_NB
716+else:
717+       raise RuntimeError("Locking only defined for nt and posix platforms")
718+
719+if os.name == 'nt':
720+       def lock(file, flags):
721+               hfile = win32file._get_osfhandle(file.fileno())
722+               win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
723+
724+       def unlock(file):
725+               hfile = win32file._get_osfhandle(file.fileno())
726+               win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
727+
728+elif os.name =='posix':
729+       def lock(file, flags):
730+               fcntl.flock(file.fileno(), flags)
731+
732+       def unlock(file):
733+               fcntl.flock(file.fileno(), fcntl.LOCK_UN)
734Index: django/utils/file.py
735===================================================================
736--- django/utils/file.py        (revision 0)
737+++ django/utils/file.py        (revision 0)
738@@ -0,0 +1,53 @@
739+import os
740+
741+__all__ = ['file_move_safe']
742+
743+try:
744+    import shutil
745+    file_move = shutil.move
746+except ImportError:
747+    file_move = os.rename
748+
749+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
750+    """
751+    Moves a file from one location to another in the safest way possible.
752+   
753+    First, it tries using shutils.move, which is OS-dependent but doesn't
754+    break with change of filesystems. Then it tries os.rename, which will
755+    break if it encounters a change in filesystems. Lastly, it streams
756+    it manually from one file to another in python.
757+
758+    Without ``allow_overwrite``, if the destination file exists, the
759+    file will raise an IOError.
760+    """
761+
762+    from django.utils import file_locks
763+
764+    if old_file_name == new_file_name:
765+        # No file moving takes place.
766+        return
767+
768+    if not allow_overwrite and os.path.exists(new_file_name):
769+        raise IOError, "Django does not allow overwriting files."
770+
771+    try:
772+        file_move(old_file_name, new_file_name)
773+        return
774+    except OSError: # moving to another filesystem
775+        pass
776+
777+    new_file = open(new_file_name, 'wb')
778+    # exclusive lock
779+    file_locks.lock(new_file, file_locks.LOCK_EX)
780+    old_file = open(old_file_name, 'rb')
781+    current_chunk = None
782+
783+    while current_chunk != '':
784+        current_chunk = old_file.read(chunk_size)
785+        new_file.write(current_chunk)
786+
787+    new_file.close()
788+    old_file.close()
789+
790+    os.remove(old_file_name)
791+
792Index: tests/modeltests/test_client/views.py
793===================================================================
794--- tests/modeltests/test_client/views.py       (revision 5343)
795+++ tests/modeltests/test_client/views.py       (working copy)
796@@ -46,6 +46,12 @@
797 
798     return HttpResponse(t.render(c))
799 
800+def post_file_view(request):
801+    "A view that expects a multipart post and returns a file in the context"
802+    t = Template('File {{ file.filename }} received', name='POST Template')
803+    c = Context({'file': request.FILES['file_file']})
804+    return HttpResponse(t.render(c))
805+
806 def redirect_view(request):
807     "A view that redirects all requests to the GET view"
808     return HttpResponseRedirect('/test_client/get_view/')
809Index: tests/modeltests/test_client/models.py
810===================================================================
811--- tests/modeltests/test_client/models.py      (revision 5343)
812+++ tests/modeltests/test_client/models.py      (working copy)
813@@ -3,7 +3,7 @@
814 
815 The test client is a class that can act like a simple
816 browser for testing purposes.
817
818+
819 It allows the user to compose GET and POST requests, and
820 obtain the response that the server gave to those requests.
821 The server Response objects are annotated with the details
822@@ -76,6 +76,20 @@
823         self.assertEqual(response.template.name, "Book template")
824         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
825 
826+    def test_post_file_view(self):
827+        "POST this python file to a view"
828+        import os, tempfile
829+        from django.conf import settings
830+        file = __file__.replace('.pyc', '.py')
831+        for upload_dir in [None, tempfile.gettempdir()]:
832+            settings.FILE_UPLOAD_DIR = upload_dir
833+            post_data = { 'name': file, 'file': open(file) }
834+            response = self.client.post('/test_client/post_file_view/', post_data)
835+            self.failUnless('models.py' in response.context['file']['filename'])
836+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
837+            if upload_dir:
838+                self.failUnless(response.context['file']['tmpfilename'])
839+
840     def test_redirect(self):
841         "GET a URL that redirects elsewhere"
842         response = self.client.get('/test_client/redirect_view/')
843Index: tests/modeltests/test_client/urls.py
844===================================================================
845--- tests/modeltests/test_client/urls.py        (revision 5343)
846+++ tests/modeltests/test_client/urls.py        (working copy)
847@@ -5,6 +5,7 @@
848 urlpatterns = patterns('',
849     (r'^get_view/$', views.get_view),
850     (r'^post_view/$', views.post_view),
851+    (r'^post_file_view/$', views.post_file_view),
852     (r'^raw_post_view/$', views.raw_post_view),
853     (r'^redirect_view/$', views.redirect_view),
854     (r'^permanent_redirect_view/$', redirect_to, { 'url': '/test_client/get_view/' }),
855Index: docs/request_response.txt
856===================================================================
857--- docs/request_response.txt   (revision 5343)
858+++ docs/request_response.txt   (working copy)
859@@ -72,13 +72,25 @@
860 ``FILES``
861     A dictionary-like object containing all uploaded files. Each key in
862     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
863-    value in ``FILES`` is a standard Python dictionary with the following three
864+    value in ``FILES`` is a standard Python dictionary with the following four
865     keys:
866 
867         * ``filename`` -- The name of the uploaded file, as a Python string.
868         * ``content-type`` -- The content type of the uploaded file.
869         * ``content`` -- The raw content of the uploaded file.
870+        * ``content-length`` -- The length of the content in bytes.
871 
872+    If streaming file uploads are enabled two additional keys
873+    describing the uploaded file will be present:
874+
875+       * ``tmpfilename`` -- The filename for the temporary file.
876+       * ``tmpfile`` -- An open file object for the temporary file.
877+
878+    The temporary file will be removed when the request finishes.
879+
880+    Note that accessing ``content`` when streaming uploads are enabled
881+    will read the whole file into memory which may not be what you want.
882+
883     Note that ``FILES`` will only contain data if the request method was POST
884     and the ``<form>`` that posted to the request had
885     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
886Index: docs/settings.txt
887===================================================================
888--- docs/settings.txt   (revision 5343)
889+++ docs/settings.txt   (working copy)
890@@ -448,6 +448,15 @@
891 
892 .. _Testing Django Applications: ../testing/
893 
894+FILE_UPLOAD_DIR
895+---------------
896+
897+Default: ``None``
898+
899+Path to a directory where temporary files should be written during
900+file uploads. Leaving this as ``None`` will disable streaming file uploads,
901+and cause all uploaded files to be stored (temporarily) in memory.
902+
903 IGNORABLE_404_ENDS
904 ------------------
905 
906@@ -764,6 +773,16 @@
907 
908 .. _site framework docs: ../sites/
909 
910+STREAMING_MIN_POST_SIZE
911+-----------------------
912+
913+Default: 524288 (``512*1024``)
914+
915+An integer specifying the minimum number of bytes that has to be
916+received (in a POST) for file upload streaming to take place. Any
917+request smaller than this will be handled in memory.
918+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
919+
920 TEMPLATE_CONTEXT_PROCESSORS
921 ---------------------------
922 
923Index: docs/forms.txt
924===================================================================
925--- docs/forms.txt      (revision 5343)
926+++ docs/forms.txt      (working copy)
927@@ -475,6 +475,19 @@
928    new_data = request.POST.copy()
929    new_data.update(request.FILES)
930 
931+Streaming file uploads.
932+-----------------------
933+
934+File uploads will be read into memory by default. This works fine for
935+small to medium sized uploads (from 1MB to 100MB depending on your
936+setup and usage). If you want to support larger uploads you can enable
937+upload streaming where only a small part of the file will be in memory
938+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
939+setting (see the settings_ document for more details).
940+
941+See `request object`_ for more details about ``request.FILES`` objects
942+with streaming file uploads enabled.
943+
944 Validators
945 ==========
946 
947@@ -698,3 +711,4 @@
948 .. _`generic views`: ../generic_views/
949 .. _`models API`: ../model-api/
950 .. _settings: ../settings/
951+.. _request object: ../request_response/#httprequest-objects