Code

Ticket #2070: 6603_all_tests_pass_uploadedfile_wrapper_fixed.diff

File 6603_all_tests_pass_uploadedfile_wrapper_fixed.diff, 42.1 KB (added by Øyvind Saltvik <oyvind@…>, 7 years ago)

Fixed a problem with UploadedFile wrapper and making sure content is not read in Fie/ImageField

Line 
1Index: django/http/multipartparser.py
2===================================================================
3--- django/http/multipartparser.py      (revision 0)
4+++ django/http/multipartparser.py      (revision 0)
5@@ -0,0 +1,328 @@
6+"""
7+MultiPart parsing for file uploads.
8+If both a progress id is sent (either through ``X-Progress-ID``
9+header or ``progress_id`` GET) and ``FILE_UPLOAD_DIR`` is set
10+in the settings, then the file progress will be tracked using
11+``request.file_progress``.
12+
13+To use this feature, consider creating a middleware with an appropriate
14+``process_request``::
15+
16+    class FileProgressTrack(object):
17+        def __get__(self, request, HttpRequest):
18+            progress_id = request.META['UPLOAD_PROGRESS_ID']
19+            status = # get progress from progress_id here
20+
21+            return status
22+
23+        def __set__(self, request, new_value):
24+            progress_id = request.META['UPLOAD_PROGRESS_ID']
25+
26+            # set the progress using progress_id here.
27+
28+    # example middleware
29+    class FileProgressExample(object):
30+        def process_request(self, request):
31+            request.__class__.file_progress = FileProgressTrack()
32+
33+
34+
35+"""
36+
37+__all__ = ['MultiPartParserError','MultiPartParser']
38+
39+
40+from django.utils.datastructures import MultiValueDict
41+from django.http.utils import str_to_unicode
42+from django.conf import settings
43+import os
44+
45+try:
46+    from cStringIO import StringIO
47+except ImportError:
48+    from StringIO import StringIO
49+
50+
51+class MultiPartParserError(Exception):
52+    def __init__(self, message):
53+        self.message = message
54+    def __str__(self):
55+        return repr(self.message)
56+
57+class MultiPartParser(object):
58+    """
59+    A rfc2388 multipart/form-data parser.
60+   
61+    parse() reads the input stream in chunk_size chunks and returns a
62+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
63+    file_upload_dir is defined files will be streamed to temporary
64+    files in the specified directory.
65+
66+    The FILES dictionary will have 'filename', 'content-type',
67+    'content' and 'content-length' entries. For streamed files it will
68+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
69+    only be read from disk when referenced for streamed files.
70+
71+    If the X-Progress-ID is sent (in one of many formats), then
72+    object.file_progress will be given a dictionary of the progress.
73+    """
74+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
75+        try:
76+            content_length = int(headers['Content-Length'])
77+        except:
78+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
79+
80+        content_type = headers.get('Content-Type')
81+
82+        if not content_type or not content_type.startswith('multipart/'):
83+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
84+           
85+        ctype, opts = self.parse_header(content_type)
86+        boundary = opts.get('boundary')
87+        from cgi import valid_boundary
88+        if not boundary or not valid_boundary(boundary):
89+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
90+
91+        progress_id = request.META['UPLOAD_PROGRESS_ID']
92+
93+        self._track_progress = file_upload_dir and progress_id # whether or not to track progress
94+        self._boundary = '--' + boundary
95+        self._input = input
96+        self._size = content_length
97+        self._received = 0
98+        self._file_upload_dir = file_upload_dir
99+        self._chunk_size = chunk_size
100+        self._state = 'PREAMBLE'
101+        self._partial = ''
102+        self._post = MultiValueDict()
103+        self._files = MultiValueDict()
104+        self._request = request
105+        self._encoding = request.encoding or settings.DEFAULT_CHARSET
106+
107+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
108+            self._file_upload_dir = None # disable file streaming for small request
109+        elif self._track_progress:
110+            request.file_progress = {'state': 'starting'}
111+
112+        try:
113+            # Use mx fast string search if available.
114+            from mx.TextTools import FS
115+            self._fs = FS(self._boundary)
116+        except ImportError:
117+            self._fs = None
118+
119+    def parse(self):
120+        try:
121+            self._parse()
122+        finally:
123+            if self._track_progress:
124+                self._request.file_progress = {'state': 'done'}
125+        return self._post, self._files
126+
127+    def _parse(self):
128+        size = self._size
129+
130+        try:
131+            while size > 0:
132+                n = self._read(self._input, min(self._chunk_size, size))
133+                if not n:
134+                    break
135+                size -= n
136+        except:
137+            # consume any remaining data so we dont generate a "Connection Reset" error
138+            size = self._size - self._received
139+            while size > 0:
140+                data = self._input.read(min(self._chunk_size, size))
141+                size -= len(data)
142+            raise
143+
144+    def _find_boundary(self, data, start, stop):
145+        """
146+        Find the next boundary and return the end of current part
147+        and start of next part.
148+        """
149+        if self._fs:
150+            boundary = self._fs.find(data, start, stop)
151+        else:
152+            boundary = data.find(self._boundary, start, stop)
153+        if boundary >= 0:
154+            end = boundary
155+            next = boundary + len(self._boundary)
156+
157+            # backup over CRLF
158+            if end > 0 and data[end-1] == '\n': end -= 1
159+            if end > 0 and data[end-1] == '\r': end -= 1
160+            # skip over --CRLF
161+            if next < stop and data[next] == '-': next += 1
162+            if next < stop and data[next] == '-': next += 1
163+            if next < stop and data[next] == '\r': next += 1
164+            if next < stop and data[next] == '\n': next += 1
165+
166+            return True, end, next
167+        else:
168+            return False, stop, stop
169+
170+    class TemporaryFile(object):
171+        "A temporary file that tries to delete itself when garbage collected."
172+        def __init__(self, dir):
173+            import tempfile
174+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
175+            self.file = os.fdopen(fd, 'w+b')
176+            self.name = name
177+
178+        def __getattr__(self, name):
179+            a = getattr(self.__dict__['file'], name)
180+            if type(a) != type(0):
181+                setattr(self, name, a)
182+            return a
183+
184+        def __del__(self):
185+            try:
186+                os.unlink(self.name)
187+            except OSError:
188+                pass
189+
190+    class LazyContent(dict):
191+        """
192+        A lazy FILES dictionary entry that reads the contents from
193+        tmpfile only when referenced.
194+        """
195+        def __init__(self, data):
196+            dict.__init__(self, data)
197+
198+        def __getitem__(self, key):
199+            if key == 'content' and not self.has_key(key):
200+                self['tmpfile'].seek(0)
201+                self['content'] = self['tmpfile'].read()
202+            return dict.__getitem__(self, key)
203+
204+    def _read(self, input, size):
205+        data = input.read(size)
206+
207+        if not data:
208+            return 0
209+
210+        read_size = len(data)
211+        self._received += read_size
212+
213+        if self._partial:
214+            data = self._partial + data
215+
216+        start = 0
217+        stop = len(data)
218+
219+        while start < stop:
220+            boundary, end, next = self._find_boundary(data, start, stop)
221+
222+            if not boundary and read_size:
223+                # make sure we dont treat a partial boundary (and its separators) as data
224+                stop -= len(self._boundary) + 16
225+                end = next = stop
226+                if end <= start:
227+                    break # need more data
228+
229+            if self._state == 'PREAMBLE':
230+                # Preamble, just ignore it
231+                self._state = 'HEADER'
232+
233+            elif self._state == 'HEADER':
234+                # Beginning of header, look for end of header and parse it if found.
235+
236+                header_end = data.find('\r\n\r\n', start, stop)
237+                if header_end == -1:
238+                    break # need more data
239+
240+                header = data[start:header_end]
241+
242+                self._fieldname = None
243+                self._filename = None
244+                self._content_type = None
245+
246+                for line in header.split('\r\n'):
247+                    ctype, opts = self.parse_header(line)
248+                    if ctype == 'content-disposition: form-data':
249+                        self._fieldname = opts.get('name')
250+                        self._filename = opts.get('filename')
251+                    elif ctype.startswith('content-type: '):
252+                        self._content_type = ctype[14:]
253+
254+                if self._filename is not None:
255+                    # cleanup filename from IE full paths:
256+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
257+
258+                    if self._filename: # ignore files without filenames
259+                        if self._file_upload_dir:
260+                            try:
261+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
262+                            except (OSError, IOError), e:
263+                                raise MultiPartParserError("Failed to create temporary file. Error was %s" % e)
264+                        else:
265+                            self._file = StringIO()
266+                    else:
267+                        self._file = None
268+                    self._filesize = 0
269+                    self._state = 'FILE'
270+                else:
271+                    self._field = StringIO()
272+                    self._state = 'FIELD'
273+                next = header_end + 4
274+
275+            elif self._state == 'FIELD':
276+                # In a field, collect data until a boundary is found.
277+
278+                self._field.write(data[start:end])
279+                if boundary:
280+                    if self._fieldname:
281+                        self._post.appendlist(self._fieldname, str_to_unicode(self._field.getvalue(), self._encoding))
282+                    self._field.close()
283+                    self._state = 'HEADER'
284+
285+            elif self._state == 'FILE':
286+                # In a file, collect data until a boundary is found.
287+
288+                if self._file:
289+                    try:
290+                        self._file.write(data[start:end])
291+                    except IOError, e:
292+                        raise MultiPartParserError("Failed to write to temporary file.")
293+                    self._filesize += end-start
294+
295+                    if self._track_progress:
296+                        self._request.file_progress = {'received': self._received,
297+                                                       'size':     self._size,
298+                                                       'state':    'uploading'}
299+
300+                if boundary:
301+                    if self._file:
302+                        if self._file_upload_dir:
303+                            self._file.seek(0)
304+                            file = self.LazyContent({
305+                                'filename': str_to_unicode(self._filename, self._encoding),
306+                                'content-type':  self._content_type,
307+                                # 'content': is read on demand
308+                                'content-length': self._filesize,
309+                                'tmpfilename': self._file.name,
310+                                'tmpfile': self._file
311+                            })
312+                        else:
313+                            file = {
314+                                'filename': str_to_unicode(self._filename, self._encoding),
315+                                'content-type':  self._content_type,
316+                                'content': self._file.getvalue(),
317+                                'content-length': self._filesize
318+                            }
319+                            self._file.close()
320+
321+                        self._files.appendlist(self._fieldname, file)
322+
323+                    self._state = 'HEADER'
324+
325+            start = next
326+
327+        self._partial = data[start:]
328+
329+        return read_size
330+
331+    def parse_header(self, line):
332+        from cgi import parse_header
333+        return parse_header(line)
334Index: django/http/__init__.py
335===================================================================
336--- django/http/__init__.py     (revision 6603)
337+++ django/http/__init__.py     (working copy)
338@@ -1,11 +1,16 @@
339 import os
340+import re
341 from Cookie import SimpleCookie
342 from pprint import pformat
343 from urllib import urlencode
344 from urlparse import urljoin
345+from django.http.utils import str_to_unicode
346+from django.http.multipartparser import MultiPartParser, MultiPartParserError
347 from django.utils.datastructures import MultiValueDict, FileDict
348 from django.utils.encoding import smart_str, iri_to_uri, force_unicode
349 
350+upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression
351+
352 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
353 
354 try:
355@@ -78,7 +83,7 @@
356 
357     def is_secure(self):
358         return os.environ.get("HTTPS") == "on"
359-
360+       
361     def _set_encoding(self, val):
362         """
363         Sets the encoding used for GET/POST accesses. If the GET or POST
364@@ -96,39 +101,55 @@
365 
366     encoding = property(_get_encoding, _set_encoding)
367 
368-def parse_file_upload(header_dict, post_data):
369-    "Returns a tuple of (POST QueryDict, FILES MultiValueDict)"
370-    import email, email.Message
371-    from cgi import parse_header
372-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
373-    raw_message += '\r\n\r\n' + post_data
374-    msg = email.message_from_string(raw_message)
375-    POST = QueryDict('', mutable=True)
376-    FILES = MultiValueDict()
377-    for submessage in msg.get_payload():
378-        if submessage and isinstance(submessage, email.Message.Message):
379-            name_dict = parse_header(submessage['Content-Disposition'])[1]
380-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
381-            # or {'name': 'blah'} for POST fields
382-            # We assume all uploaded files have a 'filename' set.
383-            if 'filename' in name_dict:
384-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
385-                if not name_dict['filename'].strip():
386-                    continue
387-                # IE submits the full path, so trim everything but the basename.
388-                # (We can't use os.path.basename because that uses the server's
389-                # directory separator, which may not be the same as the
390-                # client's one.)
391-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
392-                FILES.appendlist(name_dict['name'], FileDict({
393-                    'filename': filename,
394-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
395-                    'content': submessage.get_payload(),
396-                }))
397-            else:
398-                POST.appendlist(name_dict['name'], submessage.get_payload())
399-    return POST, FILES
400+    def _get_file_progress(self):
401+        return {}
402+   
403+    def _set_file_progress(self,value):
404+        pass
405 
406+    def _del_file_progress(self):
407+        pass
408+
409+    file_progress = property(_get_file_progress,
410+                             _set_file_progress,
411+                             _del_file_progress)
412+
413+    def _get_file_progress_from_args(self, headers, get, querystring):
414+        """
415+        This parses the request for a file progress_id value.
416+        Note that there are two distinct ways of getting the progress
417+        ID -- header and GET. One is used primarily to attach via JavaScript
418+        to the end of an HTML form action while the other is used for AJAX
419+        communication.
420+
421+        All progress IDs must be valid 32-digit hexadecimal numbers.
422+        """
423+        if 'X-Upload-ID' in headers:
424+            progress_id = headers['X-Upload-ID']
425+        elif 'progress_id' in get:
426+            progress_id = get['progress_id']
427+        else:
428+            return None
429+
430+        if not upload_id_re.match(progress_id):
431+            return None
432+
433+        return progress_id
434+
435+def parse_file_upload(headers, input, request):
436+    from django.conf import settings
437+
438+    # Only stream files to disk if FILE_STREAMING_DIR is set
439+    file_upload_dir = settings.FILE_UPLOAD_DIR
440+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
441+
442+    try:
443+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
444+        return parser.parse()
445+    except MultiPartParserError, e:
446+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
447+
448+
449 class QueryDict(MultiValueDict):
450     """
451     A specialized MultiValueDict that takes a query string when initialized.
452@@ -399,20 +420,3 @@
453 # A backwards compatible alias for HttpRequest.get_host.
454 def get_host(request):
455     return request.get_host()
456-
457-# It's neither necessary nor appropriate to use
458-# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus,
459-# this slightly more restricted function.
460-def str_to_unicode(s, encoding):
461-    """
462-    Convert basestring objects to unicode, using the given encoding. Illegaly
463-    encoded input characters are replaced with Unicode "unknown" codepoint
464-    (\ufffd).
465-
466-    Returns any non-basestring objects without change.
467-    """
468-    if isinstance(s, str):
469-        return unicode(s, encoding, 'replace')
470-    else:
471-        return s
472-
473Index: django/http/utils.py
474===================================================================
475--- django/http/utils.py        (revision 0)
476+++ django/http/utils.py        (revision 0)
477@@ -0,0 +1,16 @@
478+# It's neither necessary nor appropriate to use
479+# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus,
480+# this slightly more restricted function.
481+def str_to_unicode(s, encoding):
482+    """
483+    Convert basestring objects to unicode, using the given encoding. Illegaly
484+    encoded input characters are replaced with Unicode "unknown" codepoint
485+    (\ufffd).
486+
487+    Returns any non-basestring objects without change.
488+    """
489+    if isinstance(s, str):
490+        return unicode(s, encoding, 'replace')
491+    else:
492+        return s
493+
494Index: django/oldforms/__init__.py
495===================================================================
496--- django/oldforms/__init__.py (revision 6603)
497+++ django/oldforms/__init__.py (working copy)
498@@ -680,17 +680,22 @@
499         self.validator_list = [self.isNonEmptyFile] + validator_list
500 
501     def isNonEmptyFile(self, field_data, all_data):
502-        try:
503-            content = field_data['content']
504-        except TypeError:
505+        if field_data.has_key('_file_upload_error'):
506+            raise validators.CriticalValidationError, field_data['_file_upload_error']
507+        if not field_data.has_key('filename'):
508             raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.")
509-        if not content:
510+        if not field_data['content-length']:
511             raise validators.CriticalValidationError, ugettext("The submitted file is empty.")
512 
513     def render(self, data):
514         return u'<input type="file" id="%s" class="v%s" name="%s" />' % \
515             (self.get_id(), self.__class__.__name__, self.field_name)
516 
517+    def prepare(self, new_data):
518+        if new_data.has_key('_file_upload_error'):
519+            # pretend we got something in the field to raise a validation error later
520+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
521+
522     def html2python(data):
523         if data is None:
524             raise EmptyValue
525Index: django/db/models/base.py
526===================================================================
527--- django/db/models/base.py    (revision 6603)
528+++ django/db/models/base.py    (working copy)
529@@ -12,6 +12,7 @@
530 from django.dispatch import dispatcher
531 from django.utils.datastructures import SortedDict
532 from django.utils.functional import curry
533+from django.utils.file import file_move_safe
534 from django.utils.encoding import smart_str, force_unicode, smart_unicode
535 from django.conf import settings
536 from itertools import izip
537@@ -379,12 +380,16 @@
538     def _get_FIELD_size(self, field):
539         return os.path.getsize(self._get_FIELD_filename(field))
540 
541-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
542+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
543         directory = field.get_directory_name()
544         try: # Create the date-based directory if it doesn't exist.
545             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
546         except OSError: # Directory probably already exists.
547             pass
548+
549+        if filename is None:
550+            filename = raw_field['filename']
551+
552         filename = field.get_filename(filename)
553 
554         # If the filename already exists, keep adding an underscore to the name of
555@@ -401,9 +406,16 @@
556         setattr(self, field.attname, filename)
557 
558         full_filename = self._get_FIELD_filename(field)
559-        fp = open(full_filename, 'wb')
560-        fp.write(raw_contents)
561-        fp.close()
562+        if raw_field.has_key('tmpfilename'):
563+            raw_field['tmpfile'].close()
564+            file_move_safe(raw_field['tmpfilename'], full_filename)
565+        else:
566+            from django.utils import file_locks
567+            fp = open(full_filename, 'wb')
568+            # exclusive lock
569+            file_locks.lock(fp, file_locks.LOCK_EX)
570+            fp.write(raw_field['content'])
571+            fp.close()
572 
573         # Save the width and/or height, if applicable.
574         if isinstance(field, ImageField) and (field.width_field or field.height_field):
575Index: django/db/models/fields/__init__.py
576===================================================================
577--- django/db/models/fields/__init__.py (revision 6603)
578+++ django/db/models/fields/__init__.py (working copy)
579@@ -759,7 +759,8 @@
580         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
581         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
582         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
583-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
584+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
585+        setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save))       
586         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
587 
588     def delete_file(self, instance):
589@@ -782,9 +783,9 @@
590         if new_data.get(upload_field_name, False):
591             func = getattr(new_object, 'save_%s_file' % self.name)
592             if rel:
593-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
594+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
595             else:
596-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
597+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
598 
599     def get_directory_name(self):
600         return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
601@@ -796,7 +797,7 @@
602 
603     def save_form_data(self, instance, data):
604         if data:
605-            getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False)
606+            getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False)
607 
608     def formfield(self, **kwargs):
609         defaults = {'form_class': forms.FileField}
610Index: django/conf/global_settings.py
611===================================================================
612--- django/conf/global_settings.py      (revision 6603)
613+++ django/conf/global_settings.py      (working copy)
614@@ -251,6 +251,16 @@
615 from django import get_version
616 URL_VALIDATOR_USER_AGENT = "Django/%s (http://www.djangoproject.com)" % get_version()
617 
618+# The directory to place streamed file uploads. The web server needs write
619+# permissions on this directory.
620+# If this is None, streaming uploads are disabled.
621+FILE_UPLOAD_DIR = None
622+
623+# The minimum size of a POST before file uploads are streamed to disk.
624+# Any less than this number, and the file is uploaded to memory.
625+# Size is in bytes.
626+STREAMING_MIN_POST_SIZE = 512 * (2**10)
627+
628 ##############
629 # MIDDLEWARE #
630 ##############
631Index: django/core/handlers/wsgi.py
632===================================================================
633--- django/core/handlers/wsgi.py        (revision 6603)
634+++ django/core/handlers/wsgi.py        (working copy)
635@@ -77,6 +77,7 @@
636         self.environ = environ
637         self.path = force_unicode(environ['PATH_INFO'])
638         self.META = environ
639+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
640         self.method = environ['REQUEST_METHOD'].upper()
641 
642     def __repr__(self):
643@@ -114,7 +115,14 @@
644             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
645                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
646                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
647-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
648+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
649+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
650+                try:
651+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
652+                except:
653+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
654+                    raise
655+                self._raw_post_data = None # raw data is not available for streamed multipart messages
656             else:
657                 self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
658         else:
659@@ -172,6 +180,17 @@
660             buf.close()
661             return self._raw_post_data
662 
663+    def _get_file_progress_id(self):
664+        """
665+        Returns the Progress ID of the request,
666+        usually provided if there is a file upload
667+        going on.
668+        Returns ``None`` if no progress ID is specified.
669+        """
670+        return self._get_file_progress_from_args(self.environ,
671+                                                 self.GET,
672+                                                 self.environ.get('QUERY_STRING', ''))
673+
674     GET = property(_get_get, _set_get)
675     POST = property(_get_post, _set_post)
676     COOKIES = property(_get_cookies, _set_cookies)
677Index: django/core/handlers/modpython.py
678===================================================================
679--- django/core/handlers/modpython.py   (revision 6603)
680+++ django/core/handlers/modpython.py   (working copy)
681@@ -51,7 +51,12 @@
682     def _load_post_and_files(self):
683         "Populates self._post and self._files"
684         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
685-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
686+            self._raw_post_data = None # raw data is not available for streamed multipart messages
687+            try:
688+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
689+            except:
690+                self._post, self._files = {}, {} # make sure we dont read the input stream again
691+                raise
692         else:
693             self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
694 
695@@ -96,20 +101,21 @@
696                 'AUTH_TYPE':         self._req.ap_auth_type,
697                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
698                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
699-                'GATEWAY_INTERFACE': 'CGI/1.1',
700-                'PATH_INFO':         self._req.path_info,
701-                'PATH_TRANSLATED':   None, # Not supported
702-                'QUERY_STRING':      self._req.args,
703-                'REMOTE_ADDR':       self._req.connection.remote_ip,
704-                'REMOTE_HOST':       None, # DNS lookups not supported
705-                'REMOTE_IDENT':      self._req.connection.remote_logname,
706-                'REMOTE_USER':       self._req.user,
707-                'REQUEST_METHOD':    self._req.method,
708-                'SCRIPT_NAME':       None, # Not supported
709-                'SERVER_NAME':       self._req.server.server_hostname,
710-                'SERVER_PORT':       self._req.server.port,
711-                'SERVER_PROTOCOL':   self._req.protocol,
712-                'SERVER_SOFTWARE':   'mod_python'
713+                'GATEWAY_INTERFACE':  'CGI/1.1',
714+                'PATH_INFO':          self._req.path_info,
715+                'PATH_TRANSLATED':    None, # Not supported
716+                'QUERY_STRING':       self._req.args,
717+                'REMOTE_ADDR':        self._req.connection.remote_ip,
718+                'REMOTE_HOST':        None, # DNS lookups not supported
719+                'REMOTE_IDENT':       self._req.connection.remote_logname,
720+                'REMOTE_USER':        self._req.user,
721+                'REQUEST_METHOD':     self._req.method,
722+                'SCRIPT_NAME':        None, # Not supported
723+                'SERVER_NAME':        self._req.server.server_hostname,
724+                'SERVER_PORT':        self._req.server.port,
725+                'SERVER_PROTOCOL':    self._req.protocol,
726+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
727+                'SERVER_SOFTWARE':    'mod_python'
728             }
729             for key, value in self._req.headers_in.items():
730                 key = 'HTTP_' + key.upper().replace('-', '_')
731@@ -126,6 +132,17 @@
732     def _get_method(self):
733         return self.META['REQUEST_METHOD'].upper()
734 
735+    def _get_file_progress_id(self):
736+        """
737+        Returns the Progress ID of the request,
738+        usually provided if there is a file upload
739+        going on.
740+        Returns ``None`` if no progress ID is specified.
741+        """
742+        return self._get_file_progress_from_args(self._req.headers_in,
743+                                                 self.GET,
744+                                                 self._req.args)
745+
746     GET = property(_get_get, _set_get)
747     POST = property(_get_post, _set_post)
748     COOKIES = property(_get_cookies, _set_cookies)
749Index: django/newforms/fields.py
750===================================================================
751--- django/newforms/fields.py   (revision 6603)
752+++ django/newforms/fields.py   (working copy)
753@@ -359,9 +359,9 @@
754 
755 class UploadedFile(StrAndUnicode):
756     "A wrapper for files uploaded in a FileField"
757-    def __init__(self, filename, content):
758+    def __init__(self, filename, data):
759         self.filename = filename
760-        self.content = content
761+        self.data = data
762 
763     def __unicode__(self):
764         """
765@@ -380,12 +380,12 @@
766         if not self.required and data in EMPTY_VALUES:
767             return None
768         try:
769-            f = UploadedFile(data['filename'], data['content'])
770+            f = UploadedFile(data['filename'], data)
771         except TypeError:
772             raise ValidationError(ugettext(u"No file was submitted. Check the encoding type on the form."))
773         except KeyError:
774             raise ValidationError(ugettext(u"No file was submitted."))
775-        if not f.content:
776+        if not f.data['content-length']:
777             raise ValidationError(ugettext(u"The submitted file is empty."))
778         return f
779 
780@@ -403,11 +403,11 @@
781         try:
782             # load() is the only method that can spot a truncated JPEG,
783             #  but it cannot be called sanely after verify()
784-            trial_image = Image.open(StringIO(f.content))
785+            trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content']))
786             trial_image.load()
787             # verify() is the only method that can spot a corrupt PNG,
788             #  but it must be called immediately after the constructor
789-            trial_image = Image.open(StringIO(f.content))
790+            trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content']))
791             trial_image.verify()
792         except Exception: # Python Imaging Library doesn't recognize it as an image
793             raise ValidationError(ugettext(u"Upload a valid image. The file you uploaded was either not an image or a corrupted image."))
794Index: django/utils/file_locks.py
795===================================================================
796--- django/utils/file_locks.py  (revision 0)
797+++ django/utils/file_locks.py  (revision 0)
798@@ -0,0 +1,50 @@
799+"""
800+Locking portability by Jonathan Feignberg <jdf@pobox.com> in python cookbook
801+
802+Example Usage::
803+
804+    from django.utils import file_locks
805+
806+    f = open('./file', 'wb')
807+
808+    file_locks.lock(f, file_locks.LOCK_EX)
809+    f.write('Django')
810+    f.close()
811+"""
812+
813+
814+import os
815+
816+__all__ = ['LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock']
817+
818+if os.name == 'nt':
819+       import win32con
820+       import win32file
821+       import pywintypes
822+       LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
823+       LOCK_SH = 0
824+       LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
825+       __overlapped = pywintypes.OVERLAPPED()
826+elif os.name == 'posix':
827+       import fcntl
828+       LOCK_EX = fcntl.LOCK_EX
829+       LOCK_SH = fcntl.LOCK_SH
830+       LOCK_NB = fcntl.LOCK_NB
831+else:
832+       raise RuntimeError("Locking only defined for nt and posix platforms")
833+
834+if os.name == 'nt':
835+       def lock(file, flags):
836+               hfile = win32file._get_osfhandle(file.fileno())
837+               win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
838+
839+       def unlock(file):
840+               hfile = win32file._get_osfhandle(file.fileno())
841+               win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
842+
843+elif os.name =='posix':
844+       def lock(file, flags):
845+               fcntl.flock(file.fileno(), flags)
846+
847+       def unlock(file):
848+               fcntl.flock(file.fileno(), fcntl.LOCK_UN)
849Index: django/utils/file.py
850===================================================================
851--- django/utils/file.py        (revision 0)
852+++ django/utils/file.py        (revision 0)
853@@ -0,0 +1,53 @@
854+import os
855+
856+__all__ = ['file_move_safe']
857+
858+try:
859+    import shutil
860+    file_move = shutil.move
861+except ImportError:
862+    file_move = os.rename
863+
864+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
865+    """
866+    Moves a file from one location to another in the safest way possible.
867+   
868+    First, it tries using shutils.move, which is OS-dependent but doesn't
869+    break with change of filesystems. Then it tries os.rename, which will
870+    break if it encounters a change in filesystems. Lastly, it streams
871+    it manually from one file to another in python.
872+
873+    Without ``allow_overwrite``, if the destination file exists, the
874+    file will raise an IOError.
875+    """
876+
877+    from django.utils import file_locks
878+
879+    if old_file_name == new_file_name:
880+        # No file moving takes place.
881+        return
882+
883+    if not allow_overwrite and os.path.exists(new_file_name):
884+        raise IOError, "Django does not allow overwriting files."
885+
886+    try:
887+        file_move(old_file_name, new_file_name)
888+        return
889+    except OSError: # moving to another filesystem
890+        pass
891+
892+    new_file = open(new_file_name, 'wb')
893+    # exclusive lock
894+    file_locks.lock(new_file, file_locks.LOCK_EX)
895+    old_file = open(old_file_name, 'rb')
896+    current_chunk = None
897+
898+    while current_chunk != '':
899+        current_chunk = old_file.read(chunk_size)
900+        new_file.write(current_chunk)
901+
902+    new_file.close()
903+    old_file.close()
904+
905+    os.remove(old_file_name)
906+
907Index: tests/modeltests/test_client/views.py
908===================================================================
909--- tests/modeltests/test_client/views.py       (revision 6603)
910+++ tests/modeltests/test_client/views.py       (working copy)
911@@ -47,6 +47,12 @@
912 
913     return HttpResponse(t.render(c))
914 
915+def post_file_view(request):
916+    "A view that expects a multipart post and returns a file in the context"
917+    t = Template('File {{ file.filename }} received', name='POST Template')
918+    c = Context({'file': request.FILES['file_file']})
919+    return HttpResponse(t.render(c))
920+
921 def redirect_view(request):
922     "A view that redirects all requests to the GET view"
923     if request.GET:
924Index: tests/modeltests/test_client/models.py
925===================================================================
926--- tests/modeltests/test_client/models.py      (revision 6603)
927+++ tests/modeltests/test_client/models.py      (working copy)
928@@ -4,7 +4,7 @@
929 
930 The test client is a class that can act like a simple
931 browser for testing purposes.
932
933+
934 It allows the user to compose GET and POST requests, and
935 obtain the response that the server gave to those requests.
936 The server Response objects are annotated with the details
937@@ -80,6 +80,21 @@
938         self.assertEqual(response.template.name, "Book template")
939         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
940 
941+    def test_post_file_view(self):
942+        "POST this python file to a view"
943+        import os, tempfile
944+        from django.conf import settings
945+        file = __file__.replace('.pyc', '.py')
946+        for upload_dir, streaming_size in [(None,512*1000), (tempfile.gettempdir(), 1)]:
947+            settings.FILE_UPLOAD_DIR = upload_dir
948+            settings.STREAMING_MIN_POST_SIZE = streaming_size
949+            post_data = { 'name': file, 'file_file': open(file) }
950+            response = self.client.post('/test_client/post_file_view/', post_data)
951+            self.failUnless('models.py' in response.context['file']['filename'])
952+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
953+            if upload_dir:
954+                self.failUnless(response.context['file']['tmpfilename'])
955+
956     def test_redirect(self):
957         "GET a URL that redirects elsewhere"
958         response = self.client.get('/test_client/redirect_view/')
959Index: tests/modeltests/test_client/urls.py
960===================================================================
961--- tests/modeltests/test_client/urls.py        (revision 6603)
962+++ tests/modeltests/test_client/urls.py        (working copy)
963@@ -5,6 +5,7 @@
964 urlpatterns = patterns('',
965     (r'^get_view/$', views.get_view),
966     (r'^post_view/$', views.post_view),
967+    (r'^post_file_view/$', views.post_file_view),
968     (r'^raw_post_view/$', views.raw_post_view),
969     (r'^redirect_view/$', views.redirect_view),
970     (r'^permanent_redirect_view/$', redirect_to, { 'url': '/test_client/get_view/' }),
971Index: docs/request_response.txt
972===================================================================
973--- docs/request_response.txt   (revision 6603)
974+++ docs/request_response.txt   (working copy)
975@@ -82,13 +82,25 @@
976 ``FILES``
977     A dictionary-like object containing all uploaded files. Each key in
978     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
979-    value in ``FILES`` is a standard Python dictionary with the following three
980+    value in ``FILES`` is a standard Python dictionary with the following four
981     keys:
982 
983         * ``filename`` -- The name of the uploaded file, as a Python string.
984         * ``content-type`` -- The content type of the uploaded file.
985         * ``content`` -- The raw content of the uploaded file.
986+        * ``content-length`` -- The length of the content in bytes.
987 
988+    If streaming file uploads are enabled two additional keys
989+    describing the uploaded file will be present:
990+
991+       * ``tmpfilename`` -- The filename for the temporary file.
992+       * ``tmpfile`` -- An open file object for the temporary file.
993+
994+    The temporary file will be removed when the request finishes.
995+
996+    Note that accessing ``content`` when streaming uploads are enabled
997+    will read the whole file into memory which may not be what you want.
998+
999     Note that ``FILES`` will only contain data if the request method was POST
1000     and the ``<form>`` that posted to the request had
1001     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
1002Index: docs/settings.txt
1003===================================================================
1004--- docs/settings.txt   (revision 6603)
1005+++ docs/settings.txt   (working copy)
1006@@ -480,6 +480,15 @@
1007 
1008 .. _Testing Django Applications: ../testing/
1009 
1010+FILE_UPLOAD_DIR
1011+---------------
1012+
1013+Default: ``None``
1014+
1015+Path to a directory where temporary files should be written during
1016+file uploads. Leaving this as ``None`` will disable streaming file uploads,
1017+and cause all uploaded files to be stored (temporarily) in memory.
1018+
1019 IGNORABLE_404_ENDS
1020 ------------------
1021 
1022@@ -845,6 +854,16 @@
1023 
1024 .. _site framework docs: ../sites/
1025 
1026+STREAMING_MIN_POST_SIZE
1027+-----------------------
1028+
1029+Default: 524288 (``512*1024``)
1030+
1031+An integer specifying the minimum number of bytes that has to be
1032+received (in a POST) for file upload streaming to take place. Any
1033+request smaller than this will be handled in memory.
1034+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
1035+
1036 TEMPLATE_CONTEXT_PROCESSORS
1037 ---------------------------
1038 
1039Index: docs/forms.txt
1040===================================================================
1041--- docs/forms.txt      (revision 6603)
1042+++ docs/forms.txt      (working copy)
1043@@ -475,6 +475,19 @@
1044    new_data = request.POST.copy()
1045    new_data.update(request.FILES)
1046 
1047+Streaming file uploads.
1048+-----------------------
1049+
1050+File uploads will be read into memory by default. This works fine for
1051+small to medium sized uploads (from 1MB to 100MB depending on your
1052+setup and usage). If you want to support larger uploads you can enable
1053+upload streaming where only a small part of the file will be in memory
1054+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
1055+setting (see the settings_ document for more details).
1056+
1057+See `request object`_ for more details about ``request.FILES`` objects
1058+with streaming file uploads enabled.
1059+
1060 Validators
1061 ==========
1062 
1063@@ -698,3 +711,4 @@
1064 .. _`generic views`: ../generic_views/
1065 .. _`models API`: ../model-api/
1066 .. _settings: ../settings/
1067+.. _request object: ../request_response/#httprequest-objects