Code

Ticket #2070: 6469_streaming_file_upload.diff

File 6469_streaming_file_upload.diff, 38.5 KB (added by Faheem Mitha <faheem@…>, 7 years ago)
Line 
1diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/conf/global_settings.py
2--- a/django/conf/global_settings.py    Tue Oct 09 21:20:32 2007 -0500
3+++ b/django/conf/global_settings.py    Wed Oct 10 20:00:46 2007 +0000
4@@ -251,6 +251,16 @@ from django import get_version
5 from django import get_version
6 URL_VALIDATOR_USER_AGENT = "Django/%s (http://www.djangoproject.com)" % get_version()
7 
8+# The directory to place streamed file uploads. The web server needs write
9+# permissions on this directory.
10+# If this is None, streaming uploads are disabled.
11+FILE_UPLOAD_DIR = None
12+
13+# The minimum size of a POST before file uploads are streamed to disk.
14+# Any less than this number, and the file is uploaded to memory.
15+# Size is in bytes.
16+STREAMING_MIN_POST_SIZE = 512 * (2**10)
17+
18 ##############
19 # MIDDLEWARE #
20 ##############
21diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/core/handlers/modpython.py
22--- a/django/core/handlers/modpython.py Tue Oct 09 21:20:32 2007 -0500
23+++ b/django/core/handlers/modpython.py Wed Oct 10 20:00:46 2007 +0000
24@@ -51,7 +51,12 @@ class ModPythonRequest(http.HttpRequest)
25     def _load_post_and_files(self):
26         "Populates self._post and self._files"
27         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
28-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
29+            self._raw_post_data = None # raw data is not available for streamed multipart messages
30+            try:
31+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
32+            except:
33+                self._post, self._files = {}, {} # make sure we dont read the input stream again
34+                raise
35         else:
36             self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
37 
38@@ -96,20 +101,21 @@ class ModPythonRequest(http.HttpRequest)
39                 'AUTH_TYPE':         self._req.ap_auth_type,
40                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
41                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
42-                'GATEWAY_INTERFACE': 'CGI/1.1',
43-                'PATH_INFO':         self._req.path_info,
44-                'PATH_TRANSLATED':   None, # Not supported
45-                'QUERY_STRING':      self._req.args,
46-                'REMOTE_ADDR':       self._req.connection.remote_ip,
47-                'REMOTE_HOST':       None, # DNS lookups not supported
48-                'REMOTE_IDENT':      self._req.connection.remote_logname,
49-                'REMOTE_USER':       self._req.user,
50-                'REQUEST_METHOD':    self._req.method,
51-                'SCRIPT_NAME':       None, # Not supported
52-                'SERVER_NAME':       self._req.server.server_hostname,
53-                'SERVER_PORT':       self._req.server.port,
54-                'SERVER_PROTOCOL':   self._req.protocol,
55-                'SERVER_SOFTWARE':   'mod_python'
56+                'GATEWAY_INTERFACE':  'CGI/1.1',
57+                'PATH_INFO':          self._req.path_info,
58+                'PATH_TRANSLATED':    None, # Not supported
59+                'QUERY_STRING':       self._req.args,
60+                'REMOTE_ADDR':        self._req.connection.remote_ip,
61+                'REMOTE_HOST':        None, # DNS lookups not supported
62+                'REMOTE_IDENT':       self._req.connection.remote_logname,
63+                'REMOTE_USER':        self._req.user,
64+                'REQUEST_METHOD':     self._req.method,
65+                'SCRIPT_NAME':        None, # Not supported
66+                'SERVER_NAME':        self._req.server.server_hostname,
67+                'SERVER_PORT':        self._req.server.port,
68+                'SERVER_PROTOCOL':    self._req.protocol,
69+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
70+                'SERVER_SOFTWARE':    'mod_python'
71             }
72             for key, value in self._req.headers_in.items():
73                 key = 'HTTP_' + key.upper().replace('-', '_')
74@@ -125,6 +131,17 @@ class ModPythonRequest(http.HttpRequest)
75 
76     def _get_method(self):
77         return self.META['REQUEST_METHOD'].upper()
78+
79+    def _get_file_progress_id(self):
80+        """
81+        Returns the Progress ID of the request,
82+        usually provided if there is a file upload
83+        going on.
84+        Returns ``None`` if no progress ID is specified.
85+        """
86+        return self._get_file_progress_from_args(self._req.headers_in,
87+                                                 self.GET,
88+                                                 self._req.args)
89 
90     GET = property(_get_get, _set_get)
91     POST = property(_get_post, _set_post)
92diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/core/handlers/wsgi.py
93--- a/django/core/handlers/wsgi.py      Tue Oct 09 21:20:32 2007 -0500
94+++ b/django/core/handlers/wsgi.py      Wed Oct 10 20:00:46 2007 +0000
95@@ -77,6 +77,7 @@ class WSGIRequest(http.HttpRequest):
96         self.environ = environ
97         self.path = force_unicode(environ['PATH_INFO'])
98         self.META = environ
99+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
100         self.method = environ['REQUEST_METHOD'].upper()
101 
102     def __repr__(self):
103@@ -114,7 +115,14 @@ class WSGIRequest(http.HttpRequest):
104             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
105                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
106                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
107-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
108+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
109+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
110+                try:
111+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
112+                except:
113+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
114+                    raise
115+                self._raw_post_data = None # raw data is not available for streamed multipart messages
116             else:
117                 self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
118         else:
119@@ -170,6 +178,17 @@ class WSGIRequest(http.HttpRequest):
120             buf.close()
121             return self._raw_post_data
122 
123+    def _get_file_progress_id(self):
124+        """
125+        Returns the Progress ID of the request,
126+        usually provided if there is a file upload
127+        going on.
128+        Returns ``None`` if no progress ID is specified.
129+        """
130+        return self._get_file_progress_from_args(self.environ,
131+                                                 self.GET,
132+                                                 self.environ.get('QUERY_STRING', ''))
133+
134     GET = property(_get_get, _set_get)
135     POST = property(_get_post, _set_post)
136     COOKIES = property(_get_cookies, _set_cookies)
137diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/db/models/base.py
138--- a/django/db/models/base.py  Tue Oct 09 21:20:32 2007 -0500
139+++ b/django/db/models/base.py  Wed Oct 10 20:00:46 2007 +0000
140@@ -12,6 +12,7 @@ from django.dispatch import dispatcher
141 from django.dispatch import dispatcher
142 from django.utils.datastructures import SortedDict
143 from django.utils.functional import curry
144+from django.utils.file import file_move_safe
145 from django.utils.encoding import smart_str, force_unicode, smart_unicode
146 from django.conf import settings
147 from itertools import izip
148@@ -379,12 +380,16 @@ class Model(object):
149     def _get_FIELD_size(self, field):
150         return os.path.getsize(self._get_FIELD_filename(field))
151 
152-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
153+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
154         directory = field.get_directory_name()
155         try: # Create the date-based directory if it doesn't exist.
156             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
157         except OSError: # Directory probably already exists.
158             pass
159+
160+        if filename is None:
161+            filename = raw_field['filename']
162+
163         filename = field.get_filename(filename)
164 
165         # If the filename already exists, keep adding an underscore to the name of
166@@ -401,9 +406,16 @@ class Model(object):
167         setattr(self, field.attname, filename)
168 
169         full_filename = self._get_FIELD_filename(field)
170-        fp = open(full_filename, 'wb')
171-        fp.write(raw_contents)
172-        fp.close()
173+        if raw_field.has_key('tmpfilename'):
174+            raw_field['tmpfile'].close()
175+            file_move_safe(raw_field['tmpfilename'], full_filename)
176+        else:
177+            from django.utils import file_locks
178+            fp = open(full_filename, 'wb')
179+            # exclusive lock
180+            file_locks.lock(fp, file_locks.LOCK_EX)
181+            fp.write(raw_field['content'])
182+            fp.close()
183 
184         # Save the width and/or height, if applicable.
185         if isinstance(field, ImageField) and (field.width_field or field.height_field):
186diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/db/models/fields/__init__.py
187--- a/django/db/models/fields/__init__.py       Tue Oct 09 21:20:32 2007 -0500
188+++ b/django/db/models/fields/__init__.py       Wed Oct 10 20:00:46 2007 +0000
189@@ -757,7 +757,8 @@ class FileField(Field):
190         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
191         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
192         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
193-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
194+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
195+        setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save))       
196         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
197 
198     def delete_file(self, instance):
199@@ -780,9 +781,9 @@ class FileField(Field):
200         if new_data.get(upload_field_name, False):
201             func = getattr(new_object, 'save_%s_file' % self.name)
202             if rel:
203-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
204+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
205             else:
206-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
207+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
208 
209     def get_directory_name(self):
210         return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
211diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/http/__init__.py
212--- a/django/http/__init__.py   Tue Oct 09 21:20:32 2007 -0500
213+++ b/django/http/__init__.py   Wed Oct 10 20:00:46 2007 +0000
214@@ -1,10 +1,14 @@ import os
215 import os
216+import re
217 from Cookie import SimpleCookie
218 from pprint import pformat
219 from urllib import urlencode
220 from urlparse import urljoin
221+from django.http.multipartparser import MultiPartParser, MultiPartParserError
222 from django.utils.datastructures import MultiValueDict, FileDict
223 from django.utils.encoding import smart_str, iri_to_uri, force_unicode
224+
225+upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression
226 
227 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
228 
229@@ -78,7 +82,7 @@ class HttpRequest(object):
230 
231     def is_secure(self):
232         return os.environ.get("HTTPS") == "on"
233-
234+       
235     def _set_encoding(self, val):
236         """
237         Sets the encoding used for GET/POST accesses. If the GET or POST
238@@ -96,38 +100,54 @@ class HttpRequest(object):
239 
240     encoding = property(_get_encoding, _set_encoding)
241 
242-def parse_file_upload(header_dict, post_data):
243-    "Returns a tuple of (POST QueryDict, FILES MultiValueDict)"
244-    import email, email.Message
245-    from cgi import parse_header
246-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
247-    raw_message += '\r\n\r\n' + post_data
248-    msg = email.message_from_string(raw_message)
249-    POST = QueryDict('', mutable=True)
250-    FILES = MultiValueDict()
251-    for submessage in msg.get_payload():
252-        if submessage and isinstance(submessage, email.Message.Message):
253-            name_dict = parse_header(submessage['Content-Disposition'])[1]
254-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
255-            # or {'name': 'blah'} for POST fields
256-            # We assume all uploaded files have a 'filename' set.
257-            if 'filename' in name_dict:
258-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
259-                if not name_dict['filename'].strip():
260-                    continue
261-                # IE submits the full path, so trim everything but the basename.
262-                # (We can't use os.path.basename because that uses the server's
263-                # directory separator, which may not be the same as the
264-                # client's one.)
265-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
266-                FILES.appendlist(name_dict['name'], FileDict({
267-                    'filename': filename,
268-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
269-                    'content': submessage.get_payload(),
270-                }))
271-            else:
272-                POST.appendlist(name_dict['name'], submessage.get_payload())
273-    return POST, FILES
274+    def _get_file_progress(self):
275+        return {}
276+   
277+    def _set_file_progress(self,value):
278+        pass
279+
280+    def _del_file_progress(self):
281+        pass
282+
283+    file_progress = property(_get_file_progress,
284+                             _set_file_progress,
285+                             _del_file_progress)
286+
287+    def _get_file_progress_from_args(self, headers, get, querystring):
288+        """
289+        This parses the request for a file progress_id value.
290+        Note that there are two distinct ways of getting the progress
291+        ID -- header and GET. One is used primarily to attach via JavaScript
292+        to the end of an HTML form action while the other is used for AJAX
293+        communication.
294+
295+        All progress IDs must be valid 32-digit hexadecimal numbers.
296+        """
297+        if 'X-Upload-ID' in headers:
298+            progress_id = headers['X-Upload-ID']
299+        elif 'progress_id' in get:
300+            progress_id = get['progress_id']
301+        else:
302+            return None
303+
304+        if not upload_id_re.match(progress_id):
305+            return None
306+
307+        return progress_id
308+
309+def parse_file_upload(headers, input, request):
310+    from django.conf import settings
311+
312+    # Only stream files to disk if FILE_STREAMING_DIR is set
313+    file_upload_dir = settings.FILE_UPLOAD_DIR
314+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
315+
316+    try:
317+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
318+        return parser.parse()
319+    except MultiPartParserError, e:
320+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
321+
322 
323 class QueryDict(MultiValueDict):
324     """
325diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/http/multipartparser.py
326--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
327+++ b/django/http/multipartparser.py    Wed Oct 10 20:00:46 2007 +0000
328@@ -0,0 +1,325 @@
329+"""
330+MultiPart parsing for file uploads.
331+If both a progress id is sent (either through ``X-Progress-ID``
332+header or ``progress_id`` GET) and ``FILE_UPLOAD_DIR`` is set
333+in the settings, then the file progress will be tracked using
334+``request.file_progress``.
335+
336+To use this feature, consider creating a middleware with an appropriate
337+``process_request``::
338+
339+    class FileProgressTrack(object):
340+        def __get__(self, request, HttpRequest):
341+            progress_id = request.META['UPLOAD_PROGRESS_ID']
342+            status = # get progress from progress_id here
343+
344+            return status
345+
346+        def __set__(self, request, new_value):
347+            progress_id = request.META['UPLOAD_PROGRESS_ID']
348+
349+            # set the progress using progress_id here.
350+
351+    # example middleware
352+    class FileProgressExample(object):
353+        def process_request(self, request):
354+            request.__class__.file_progress = FileProgressTrack()
355+
356+
357+
358+"""
359+
360+__all__ = ['MultiPartParserError','MultiPartParser']
361+
362+
363+from django.utils.datastructures import MultiValueDict
364+import os
365+
366+try:
367+    from cStringIO import StringIO
368+except ImportError:
369+    from StringIO import StringIO
370+
371+
372+class MultiPartParserError(Exception):
373+    def __init__(self, message):
374+        self.message = message
375+    def __str__(self):
376+        return repr(self.message)
377+
378+class MultiPartParser(object):
379+    """
380+    A rfc2388 multipart/form-data parser.
381+   
382+    parse() reads the input stream in chunk_size chunks and returns a
383+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
384+    file_upload_dir is defined files will be streamed to temporary
385+    files in the specified directory.
386+
387+    The FILES dictionary will have 'filename', 'content-type',
388+    'content' and 'content-length' entries. For streamed files it will
389+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
390+    only be read from disk when referenced for streamed files.
391+
392+    If the X-Progress-ID is sent (in one of many formats), then
393+    object.file_progress will be given a dictionary of the progress.
394+    """
395+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
396+        try:
397+            content_length = int(headers['Content-Length'])
398+        except:
399+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
400+
401+        content_type = headers.get('Content-Type')
402+
403+        if not content_type or not content_type.startswith('multipart/'):
404+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
405+           
406+        ctype, opts = self.parse_header(content_type)
407+        boundary = opts.get('boundary')
408+        from cgi import valid_boundary
409+        if not boundary or not valid_boundary(boundary):
410+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
411+
412+        progress_id = request.META['UPLOAD_PROGRESS_ID']
413+
414+        self._track_progress = file_upload_dir and progress_id # whether or not to track progress
415+        self._boundary = '--' + boundary
416+        self._input = input
417+        self._size = content_length
418+        self._received = 0
419+        self._file_upload_dir = file_upload_dir
420+        self._chunk_size = chunk_size
421+        self._state = 'PREAMBLE'
422+        self._partial = ''
423+        self._post = MultiValueDict()
424+        self._files = MultiValueDict()
425+        self._request = request
426+
427+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
428+            self._file_upload_dir = None # disable file streaming for small request
429+        elif self._track_progress:
430+            request.file_progress = {'state': 'starting'}
431+
432+        try:
433+            # Use mx fast string search if available.
434+            from mx.TextTools import FS
435+            self._fs = FS(self._boundary)
436+        except ImportError:
437+            self._fs = None
438+
439+    def parse(self):
440+        try:
441+            self._parse()
442+        finally:
443+            if self._track_progress:
444+                self._request.file_progress = {'state': 'done'}
445+        return self._post, self._files
446+
447+    def _parse(self):
448+        size = self._size
449+
450+        try:
451+            while size > 0:
452+                n = self._read(self._input, min(self._chunk_size, size))
453+                if not n:
454+                    break
455+                size -= n
456+        except:
457+            # consume any remaining data so we dont generate a "Connection Reset" error
458+            size = self._size - self._received
459+            while size > 0:
460+                data = self._input.read(min(self._chunk_size, size))
461+                size -= len(data)
462+            raise
463+
464+    def _find_boundary(self, data, start, stop):
465+        """
466+        Find the next boundary and return the end of current part
467+        and start of next part.
468+        """
469+        if self._fs:
470+            boundary = self._fs.find(data, start, stop)
471+        else:
472+            boundary = data.find(self._boundary, start, stop)
473+        if boundary >= 0:
474+            end = boundary
475+            next = boundary + len(self._boundary)
476+
477+            # backup over CRLF
478+            if end > 0 and data[end-1] == '\n': end -= 1
479+            if end > 0 and data[end-1] == '\r': end -= 1
480+            # skip over --CRLF
481+            if next < stop and data[next] == '-': next += 1
482+            if next < stop and data[next] == '-': next += 1
483+            if next < stop and data[next] == '\r': next += 1
484+            if next < stop and data[next] == '\n': next += 1
485+
486+            return True, end, next
487+        else:
488+            return False, stop, stop
489+
490+    class TemporaryFile(object):
491+        "A temporary file that tries to delete itself when garbage collected."
492+        def __init__(self, dir):
493+            import tempfile
494+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
495+            self.file = os.fdopen(fd, 'w+b')
496+            self.name = name
497+
498+        def __getattr__(self, name):
499+            a = getattr(self.__dict__['file'], name)
500+            if type(a) != type(0):
501+                setattr(self, name, a)
502+            return a
503+
504+        def __del__(self):
505+            try:
506+                os.unlink(self.name)
507+            except OSError:
508+                pass
509+
510+    class LazyContent(dict):
511+        """
512+        A lazy FILES dictionary entry that reads the contents from
513+        tmpfile only when referenced.
514+        """
515+        def __init__(self, data):
516+            dict.__init__(self, data)
517+
518+        def __getitem__(self, key):
519+            if key == 'content' and not self.has_key(key):
520+                self['tmpfile'].seek(0)
521+                self['content'] = self['tmpfile'].read()
522+            return dict.__getitem__(self, key)
523+
524+    def _read(self, input, size):
525+        data = input.read(size)
526+
527+        if not data:
528+            return 0
529+
530+        read_size = len(data)
531+        self._received += read_size
532+
533+        if self._partial:
534+            data = self._partial + data
535+
536+        start = 0
537+        stop = len(data)
538+
539+        while start < stop:
540+            boundary, end, next = self._find_boundary(data, start, stop)
541+
542+            if not boundary and read_size:
543+                # make sure we dont treat a partial boundary (and its separators) as data
544+                stop -= len(self._boundary) + 16
545+                end = next = stop
546+                if end <= start:
547+                    break # need more data
548+
549+            if self._state == 'PREAMBLE':
550+                # Preamble, just ignore it
551+                self._state = 'HEADER'
552+
553+            elif self._state == 'HEADER':
554+                # Beginning of header, look for end of header and parse it if found.
555+
556+                header_end = data.find('\r\n\r\n', start, stop)
557+                if header_end == -1:
558+                    break # need more data
559+
560+                header = data[start:header_end]
561+
562+                self._fieldname = None
563+                self._filename = None
564+                self._content_type = None
565+
566+                for line in header.split('\r\n'):
567+                    ctype, opts = self.parse_header(line)
568+                    if ctype == 'content-disposition: form-data':
569+                        self._fieldname = opts.get('name')
570+                        self._filename = opts.get('filename')
571+                    elif ctype.startswith('content-type: '):
572+                        self._content_type = ctype[14:]
573+
574+                if self._filename is not None:
575+                    # cleanup filename from IE full paths:
576+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
577+
578+                    if self._filename: # ignore files without filenames
579+                        if self._file_upload_dir:
580+                            try:
581+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
582+                            except (OSError, IOError), e:
583+                                raise MultiPartParserError("Failed to create temporary file. Error was %s" % e)
584+                        else:
585+                            self._file = StringIO()
586+                    else:
587+                        self._file = None
588+                    self._filesize = 0
589+                    self._state = 'FILE'
590+                else:
591+                    self._field = StringIO()
592+                    self._state = 'FIELD'
593+                next = header_end + 4
594+
595+            elif self._state == 'FIELD':
596+                # In a field, collect data until a boundary is found.
597+
598+                self._field.write(data[start:end])
599+                if boundary:
600+                    if self._fieldname:
601+                        self._post.appendlist(self._fieldname, self._field.getvalue())
602+                    self._field.close()
603+                    self._state = 'HEADER'
604+
605+            elif self._state == 'FILE':
606+                # In a file, collect data until a boundary is found.
607+
608+                if self._file:
609+                    try:
610+                        self._file.write(data[start:end])
611+                    except IOError, e:
612+                        raise MultiPartParserError("Failed to write to temporary file.")
613+                    self._filesize += end-start
614+
615+                    if self._track_progress:
616+                        self._request.file_progress = {'received': self._received,
617+                                                       'size':     self._size,
618+                                                       'state':    'uploading'}
619+
620+                if boundary:
621+                    if self._file:
622+                        if self._file_upload_dir:
623+                            self._file.seek(0)
624+                            file = self.LazyContent({
625+                                'filename': self._filename,
626+                                'content-type':  self._content_type,
627+                                # 'content': is read on demand
628+                                'content-length': self._filesize,
629+                                'tmpfilename': self._file.name,
630+                                'tmpfile': self._file
631+                            })
632+                        else:
633+                            file = {
634+                                'filename': self._filename,
635+                                'content-type':  self._content_type,
636+                                'content': self._file.getvalue(),
637+                                'content-length': self._filesize
638+                            }
639+                            self._file.close()
640+
641+                        self._files.appendlist(self._fieldname, file)
642+
643+                    self._state = 'HEADER'
644+
645+            start = next
646+
647+        self._partial = data[start:]
648+
649+        return read_size
650+
651+    def parse_header(self, line):
652+        from cgi import parse_header
653+        return parse_header(line)
654diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/oldforms/__init__.py
655--- a/django/oldforms/__init__.py       Tue Oct 09 21:20:32 2007 -0500
656+++ b/django/oldforms/__init__.py       Wed Oct 10 20:00:46 2007 +0000
657@@ -680,16 +680,21 @@ class FileUploadField(FormField):
658         self.validator_list = [self.isNonEmptyFile] + validator_list
659 
660     def isNonEmptyFile(self, field_data, all_data):
661-        try:
662-            content = field_data['content']
663-        except TypeError:
664+        if field_data.has_key('_file_upload_error'):
665+            raise validators.CriticalValidationError, field_data['_file_upload_error']
666+        if not field_data.has_key('filename'):
667             raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.")
668-        if not content:
669+        if not field_data['content-length']:
670             raise validators.CriticalValidationError, ugettext("The submitted file is empty.")
671 
672     def render(self, data):
673         return u'<input type="file" id="%s" class="v%s" name="%s" />' % \
674             (self.get_id(), self.__class__.__name__, self.field_name)
675+
676+    def prepare(self, new_data):
677+        if new_data.has_key('_file_upload_error'):
678+            # pretend we got something in the field to raise a validation error later
679+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
680 
681     def html2python(data):
682         if data is None:
683diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/utils/file.py
684--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
685+++ b/django/utils/file.py      Wed Oct 10 20:00:46 2007 +0000
686@@ -0,0 +1,53 @@
687+import os
688+
689+__all__ = ['file_move_safe']
690+
691+try:
692+    import shutil
693+    file_move = shutil.move
694+except ImportError:
695+    file_move = os.rename
696+
697+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
698+    """
699+    Moves a file from one location to another in the safest way possible.
700+   
701+    First, it tries using shutils.move, which is OS-dependent but doesn't
702+    break with change of filesystems. Then it tries os.rename, which will
703+    break if it encounters a change in filesystems. Lastly, it streams
704+    it manually from one file to another in python.
705+
706+    Without ``allow_overwrite``, if the destination file exists, the
707+    file will raise an IOError.
708+    """
709+
710+    from django.utils import file_locks
711+
712+    if old_file_name == new_file_name:
713+        # No file moving takes place.
714+        return
715+
716+    if not allow_overwrite and os.path.exists(new_file_name):
717+        raise IOError, "Django does not allow overwriting files."
718+
719+    try:
720+        file_move(old_file_name, new_file_name)
721+        return
722+    except OSError: # moving to another filesystem
723+        pass
724+
725+    new_file = open(new_file_name, 'wb')
726+    # exclusive lock
727+    file_locks.lock(new_file, file_locks.LOCK_EX)
728+    old_file = open(old_file_name, 'rb')
729+    current_chunk = None
730+
731+    while current_chunk != '':
732+        current_chunk = old_file.read(chunk_size)
733+        new_file.write(current_chunk)
734+
735+    new_file.close()
736+    old_file.close()
737+
738+    os.remove(old_file_name)
739+
740diff -r 4ce69c5b8f1d -r 1c51845bdd22 django/utils/file_locks.py
741--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
742+++ b/django/utils/file_locks.py        Wed Oct 10 20:00:46 2007 +0000
743@@ -0,0 +1,50 @@
744+"""
745+Locking portability by Jonathan Feignberg <jdf@pobox.com> in python cookbook
746+
747+Example Usage::
748+
749+    from django.utils import file_locks
750+
751+    f = open('./file', 'wb')
752+
753+    file_locks.lock(f, file_locks.LOCK_EX)
754+    f.write('Django')
755+    f.close()
756+"""
757+
758+
759+import os
760+
761+__all__ = ['LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock']
762+
763+if os.name == 'nt':
764+       import win32con
765+       import win32file
766+       import pywintypes
767+       LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
768+       LOCK_SH = 0
769+       LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
770+       __overlapped = pywintypes.OVERLAPPED()
771+elif os.name == 'posix':
772+       import fcntl
773+       LOCK_EX = fcntl.LOCK_EX
774+       LOCK_SH = fcntl.LOCK_SH
775+       LOCK_NB = fcntl.LOCK_NB
776+else:
777+       raise RuntimeError("Locking only defined for nt and posix platforms")
778+
779+if os.name == 'nt':
780+       def lock(file, flags):
781+               hfile = win32file._get_osfhandle(file.fileno())
782+               win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
783+
784+       def unlock(file):
785+               hfile = win32file._get_osfhandle(file.fileno())
786+               win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
787+
788+elif os.name =='posix':
789+       def lock(file, flags):
790+               fcntl.flock(file.fileno(), flags)
791+
792+       def unlock(file):
793+               fcntl.flock(file.fileno(), fcntl.LOCK_UN)
794diff -r 4ce69c5b8f1d -r 1c51845bdd22 docs/forms.txt
795--- a/docs/forms.txt    Tue Oct 09 21:20:32 2007 -0500
796+++ b/docs/forms.txt    Wed Oct 10 20:00:46 2007 +0000
797@@ -475,6 +475,19 @@ this::
798    new_data = request.POST.copy()
799    new_data.update(request.FILES)
800 
801+Streaming file uploads.
802+-----------------------
803+
804+File uploads will be read into memory by default. This works fine for
805+small to medium sized uploads (from 1MB to 100MB depending on your
806+setup and usage). If you want to support larger uploads you can enable
807+upload streaming where only a small part of the file will be in memory
808+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
809+setting (see the settings_ document for more details).
810+
811+See `request object`_ for more details about ``request.FILES`` objects
812+with streaming file uploads enabled.
813+
814 Validators
815 ==========
816 
817@@ -698,3 +711,4 @@ fails. If no message is passed in, a def
818 .. _`generic views`: ../generic_views/
819 .. _`models API`: ../model-api/
820 .. _settings: ../settings/
821+.. _request object: ../request_response/#httprequest-objects
822diff -r 4ce69c5b8f1d -r 1c51845bdd22 docs/request_response.txt
823--- a/docs/request_response.txt Tue Oct 09 21:20:32 2007 -0500
824+++ b/docs/request_response.txt Wed Oct 10 20:00:46 2007 +0000
825@@ -82,12 +82,24 @@ All attributes except ``session`` should
826 ``FILES``
827     A dictionary-like object containing all uploaded files. Each key in
828     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
829-    value in ``FILES`` is a standard Python dictionary with the following three
830+    value in ``FILES`` is a standard Python dictionary with the following four
831     keys:
832 
833         * ``filename`` -- The name of the uploaded file, as a Python string.
834         * ``content-type`` -- The content type of the uploaded file.
835         * ``content`` -- The raw content of the uploaded file.
836+        * ``content-length`` -- The length of the content in bytes.
837+
838+    If streaming file uploads are enabled two additional keys
839+    describing the uploaded file will be present:
840+
841+       * ``tmpfilename`` -- The filename for the temporary file.
842+       * ``tmpfile`` -- An open file object for the temporary file.
843+
844+    The temporary file will be removed when the request finishes.
845+
846+    Note that accessing ``content`` when streaming uploads are enabled
847+    will read the whole file into memory which may not be what you want.
848 
849     Note that ``FILES`` will only contain data if the request method was POST
850     and the ``<form>`` that posted to the request had
851diff -r 4ce69c5b8f1d -r 1c51845bdd22 docs/settings.txt
852--- a/docs/settings.txt Tue Oct 09 21:20:32 2007 -0500
853+++ b/docs/settings.txt Wed Oct 10 20:00:46 2007 +0000
854@@ -479,6 +479,15 @@ these paths should use Unix-style forwar
855 `Testing Django Applications`_.
856 
857 .. _Testing Django Applications: ../testing/
858+
859+FILE_UPLOAD_DIR
860+---------------
861+
862+Default: ``None``
863+
864+Path to a directory where temporary files should be written during
865+file uploads. Leaving this as ``None`` will disable streaming file uploads,
866+and cause all uploaded files to be stored (temporarily) in memory.
867 
868 IGNORABLE_404_ENDS
869 ------------------
870@@ -822,6 +831,16 @@ See the `site framework docs`_.
871 
872 .. _site framework docs: ../sites/
873 
874+STREAMING_MIN_POST_SIZE
875+-----------------------
876+
877+Default: 524288 (``512*1024``)
878+
879+An integer specifying the minimum number of bytes that has to be
880+received (in a POST) for file upload streaming to take place. Any
881+request smaller than this will be handled in memory.
882+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
883+
884 TEMPLATE_CONTEXT_PROCESSORS
885 ---------------------------
886 
887diff -r 4ce69c5b8f1d -r 1c51845bdd22 tests/modeltests/test_client/models.py
888--- a/tests/modeltests/test_client/models.py    Tue Oct 09 21:20:32 2007 -0500
889+++ b/tests/modeltests/test_client/models.py    Wed Oct 10 20:00:46 2007 +0000
890@@ -4,7 +4,7 @@ 38. Testing using the Test Client
891 
892 The test client is a class that can act like a simple
893 browser for testing purposes.
894
895+
896 It allows the user to compose GET and POST requests, and
897 obtain the response that the server gave to those requests.
898 The server Response objects are annotated with the details
899@@ -80,6 +80,21 @@ class ClientTest(TestCase):
900         self.assertEqual(response.template.name, "Book template")
901         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
902 
903+    def test_post_file_view(self):
904+        "POST this python file to a view"
905+        import os, tempfile
906+        from django.conf import settings
907+        file = __file__.replace('.pyc', '.py')
908+        for upload_dir, streaming_size in [(None,512*1000), (tempfile.gettempdir(), 1)]:
909+            settings.FILE_UPLOAD_DIR = upload_dir
910+            settings.STREAMING_MIN_POST_SIZE = streaming_size
911+            post_data = { 'name': file, 'file_file': open(file) }
912+            response = self.client.post('/test_client/post_file_view/', post_data)
913+            self.failUnless('models.py' in response.context['file']['filename'])
914+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
915+            if upload_dir:
916+                self.failUnless(response.context['file']['tmpfilename'])
917+
918     def test_redirect(self):
919         "GET a URL that redirects elsewhere"
920         response = self.client.get('/test_client/redirect_view/')
921diff -r 4ce69c5b8f1d -r 1c51845bdd22 tests/modeltests/test_client/urls.py
922--- a/tests/modeltests/test_client/urls.py      Tue Oct 09 21:20:32 2007 -0500
923+++ b/tests/modeltests/test_client/urls.py      Wed Oct 10 20:00:46 2007 +0000
924@@ -5,6 +5,7 @@ urlpatterns = patterns('',
925 urlpatterns = patterns('',
926     (r'^get_view/$', views.get_view),
927     (r'^post_view/$', views.post_view),
928+    (r'^post_file_view/$', views.post_file_view),
929     (r'^raw_post_view/$', views.raw_post_view),
930     (r'^redirect_view/$', views.redirect_view),
931     (r'^permanent_redirect_view/$', redirect_to, { 'url': '/test_client/get_view/' }),
932diff -r 4ce69c5b8f1d -r 1c51845bdd22 tests/modeltests/test_client/views.py
933--- a/tests/modeltests/test_client/views.py     Tue Oct 09 21:20:32 2007 -0500
934+++ b/tests/modeltests/test_client/views.py     Wed Oct 10 20:00:46 2007 +0000
935@@ -45,6 +45,12 @@ def raw_post_view(request):
936         t = Template("GET request.", name="Book GET template")
937         c = Context()
938 
939+    return HttpResponse(t.render(c))
940+
941+def post_file_view(request):
942+    "A view that expects a multipart post and returns a file in the context"
943+    t = Template('File {{ file.filename }} received', name='POST Template')
944+    c = Context({'file': request.FILES['file_file']})
945     return HttpResponse(t.render(c))
946 
947 def redirect_view(request):