Ticket #2070: 5343_streaming_file_upload_best.diff

File 5343_streaming_file_upload_best.diff, 38.4 KB (added by Michael Axiak <axiak@…>, 8 years ago)

Sorry about that -- this one is the better one.

Line 
1Index: django/http/multipartparser.py
2===================================================================
3--- django/http/multipartparser.py      (revision 0)
4+++ django/http/multipartparser.py      (revision 0)
5@@ -0,0 +1,327 @@
6+"""
7+MultiPart parsing for file uploads.
8+If both a progress id is sent (either through ``X-Progress-ID``
9+header or ``progress_id`` GET) and ``FILE_UPLOAD_DIR`` is set
10+in the settings, then the file progress will be tracked using
11+``request.file_progress``.
12+
13+To use this feature, consider creating a middleware with an appropriate
14+``process_request``::
15+
16+    class FileProgressTrack(object):
17+        def __get__(self, request, HttpRequest):
18+            progress_id = request.META['UPLOAD_PROGRESS_ID']
19+            status = # get progress from progress_id here
20+
21+            return status
22+
23+        def __set__(self, request, new_value):
24+            progress_id = request.META['UPLOAD_PROGRESS_ID']
25+
26+            # set the progress using progress_id here.
27+
28+    # example middleware
29+    class FileProgressExample(object):
30+        def process_request(self, request):
31+            request.__class__.file_progress = FileProgressTrack()
32+
33+
34+
35+"""
36+
37+__all__ = ['MultiPartParserError','MultiPartParser']
38+
39+
40+from django.utils.datastructures import MultiValueDict
41+import os
42+
43+try:
44+    from cStringIO import StringIO
45+except ImportError:
46+    from StringIO import StringIO
47+
48+
49+class MultiPartParserError(Exception):
50+    def __init__(self, message):
51+        self.message = message
52+    def __str__(self):
53+        return repr(self.message)
54+
55+class MultiPartParser(object):
56+    """
57+    A rfc2388 multipart/form-data parser.
58+   
59+    parse() reads the input stream in chunk_size chunks and returns a
60+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
61+    file_upload_dir is defined files will be streamed to temporary
62+    files in the specified directory.
63+
64+    The FILES dictionary will have 'filename', 'content-type',
65+    'content' and 'content-length' entries. For streamed files it will
66+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
67+    only be read from disk when referenced for streamed files.
68+
69+    If the X-Progress-ID is sent (in one of many formats), then
70+    object.file_progress will be given a dictionary of the progress.
71+    """
72+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
73+        try:
74+            content_length = int(headers['Content-Length'])
75+        except:
76+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
77+
78+        content_type = headers.get('Content-Type')
79+
80+        if not content_type or not content_type.startswith('multipart/'):
81+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
82+           
83+        ctype, opts = self.parse_header(content_type)
84+        boundary = opts.get('boundary')
85+        from cgi import valid_boundary
86+        if not boundary or not valid_boundary(boundary):
87+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
88+
89+        progress_id = request.META['UPLOAD_PROGRESS_ID']
90+
91+        self._track_progress = file_upload_dir and progress_id # whether or not to track progress
92+        self._boundary = '--' + boundary
93+        self._input = input
94+        self._size = content_length
95+        self._received = 0
96+        self._file_upload_dir = file_upload_dir
97+        self._chunk_size = chunk_size
98+        self._state = 'PREAMBLE'
99+        self._partial = ''
100+        self._post = MultiValueDict()
101+        self._files = MultiValueDict()
102+        self._request = request
103+
104+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
105+            self._file_upload_dir = None # disable file streaming for small request
106+        elif self._track_progress:
107+            request.file_progress = {'state': 'starting'}
108+
109+        try:
110+            # Use mx fast string search if available.
111+            from mx.TextTools import FS
112+            self._fs = FS(self._boundary)
113+        except ImportError:
114+            self._fs = None
115+
116+    def parse(self):
117+        try:
118+            self._parse()
119+        finally:
120+            if self._track_progress:
121+                self._request.file_progress = {'state': 'done'}
122+        return self._post, self._files
123+
124+    def _parse(self):
125+        size = self._size
126+
127+        try:
128+            while size > 0:
129+                n = self._read(self._input, min(self._chunk_size, size))
130+                if not n:
131+                    break
132+                size -= n
133+        except:
134+            # consume any remaining data so we dont generate a "Connection Reset" error
135+            size = self._size - self._received
136+            while size > 0:
137+                data = self._input.read(min(self._chunk_size, size))
138+                size -= len(data)
139+            raise
140+
141+    def _find_boundary(self, data, start, stop):
142+        """
143+        Find the next boundary and return the end of current part
144+        and start of next part.
145+        """
146+        if self._fs:
147+            boundary = self._fs.find(data, start, stop)
148+        else:
149+            boundary = data.find(self._boundary, start, stop)
150+        if boundary >= 0:
151+            end = boundary
152+            next = boundary + len(self._boundary)
153+
154+            # backup over CRLF
155+            if end > 0 and data[end-1] == '\n': end -= 1
156+            if end > 0 and data[end-1] == '\r': end -= 1
157+            # skip over --CRLF
158+            if next < stop and data[next] == '-': next += 1
159+            if next < stop and data[next] == '-': next += 1
160+            if next < stop and data[next] == '\r': next += 1
161+            if next < stop and data[next] == '\n': next += 1
162+
163+            return True, end, next
164+        else:
165+            return False, stop, stop
166+
167+    class TemporaryFile(object):
168+        "A temporary file that tries to delete itself when garbage collected."
169+        def __init__(self, dir):
170+            import tempfile
171+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
172+            self.file = os.fdopen(fd, 'w+b')
173+            self.name = name
174+
175+        def __getattr__(self, name):
176+            a = getattr(self.__dict__['file'], name)
177+            if type(a) != type(0):
178+                setattr(self, name, a)
179+            return a
180+
181+        def __del__(self):
182+            try:
183+                os.unlink(self.name)
184+            except OSError:
185+                pass
186+
187+    class LazyContent(dict):
188+        """
189+        A lazy FILES dictionary entry that reads the contents from
190+        tmpfile only when referenced.
191+        """
192+        def __init__(self, data):
193+            dict.__init__(self, data)
194+
195+        def __getitem__(self, key):
196+            if key == 'content' and not self.has_key(key):
197+                self['tmpfile'].seek(0)
198+                self['content'] = self['tmpfile'].read()
199+            return dict.__getitem__(self, key)
200+
201+    def _read(self, input, size):
202+        data = input.read(size)
203+
204+        if not data:
205+            return 0
206+
207+        read_size = len(data)
208+        self._received += read_size
209+
210+        if self._partial:
211+            data = self._partial + data
212+
213+        start = 0
214+        stop = len(data)
215+
216+        while start < stop:
217+            boundary, end, next = self._find_boundary(data, start, stop)
218+
219+            if not boundary and read_size:
220+                # make sure we dont treat a partial boundary (and its separators) as data
221+                stop -= len(self._boundary) + 16
222+                end = next = stop
223+                if end <= start:
224+                    break # need more data
225+
226+            if self._state == 'PREAMBLE':
227+                # Preamble, just ignore it
228+                self._state = 'HEADER'
229+
230+            elif self._state == 'HEADER':
231+                # Beginning of header, look for end of header and parse it if found.
232+
233+                header_end = data.find('\r\n\r\n', start, stop)
234+                if header_end == -1:
235+                    break # need more data
236+
237+                header = data[start:header_end]
238+
239+                self._fieldname = None
240+                self._filename = None
241+                self._content_type = None
242+
243+                for line in header.split('\r\n'):
244+                    ctype, opts = self.parse_header(line)
245+                    if ctype == 'content-disposition: form-data':
246+                        self._fieldname = opts.get('name')
247+                        self._filename = opts.get('filename')
248+                    elif ctype.startswith('content-type: '):
249+                        self._content_type = ctype[14:]
250+
251+                if self._filename is not None:
252+                    # cleanup filename from IE full paths:
253+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
254+
255+                    if self._filename: # ignore files without filenames
256+                        if self._file_upload_dir:
257+                           
258+                            try:
259+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
260+                            except OSError, IOError:
261+                                raise MultiPartParserError("Failed to create temporary file. Error was %s" % e)
262+                        else:
263+                            self._file = StringIO()
264+                    else:
265+                        self._file = None
266+                    self._filesize = 0
267+                    self._state = 'FILE'
268+                else:
269+                    self._field = StringIO()
270+                    self._state = 'FIELD'
271+                next = header_end + 4
272+
273+            elif self._state == 'FIELD':
274+                # In a field, collect data until a boundary is found.
275+
276+                self._field.write(data[start:end])
277+                if boundary:
278+                    if self._fieldname:
279+                        self._post.appendlist(self._fieldname, self._field.getvalue())
280+                    self._field.close()
281+                    self._state = 'HEADER'
282+
283+            elif self._state == 'FILE':
284+                # In a file, collect data until a boundary is found.
285+
286+                if self._file:
287+                    try:
288+                        self._file.write(data[start:end])
289+                    except IOError, e:
290+                        raise MultiPartParserError("Failed to write to temporary file.")
291+                    self._filesize += end-start
292+
293+                    if self._track_progress:
294+                        self._request.file_progress = {'received': self._received,
295+                                                       'size':     self._size,
296+                                                       'state':    'uploading'}
297+
298+                if boundary:
299+                    if self._file:
300+                        if self._file_upload_dir:
301+                            self._file.seek(0)
302+                            file = self.LazyContent({
303+                                'filename': self._filename,
304+                                'content-type':  self._content_type,
305+                                # 'content': is read on demand
306+                                'content-length': self._filesize,
307+                                'tmpfilename': self._file.name,
308+                                'tmpfile': self._file
309+                            })
310+                        else:
311+                            file = {
312+                                'filename': self._filename,
313+                                'content-type':  self._content_type,
314+                                'content': self._file.getvalue(),
315+                                'content-length': self._filesize
316+                            }
317+                            self._file.close()
318+
319+                        self._files.appendlist(self._fieldname, file)
320+
321+                    self._state = 'HEADER'
322+
323+            start = next
324+
325+        self._partial = data[start:]
326+
327+        return read_size
328+
329+    def parse_header(self, line):
330+        from cgi import parse_header
331+        return parse_header(line)
332Index: django/http/__init__.py
333===================================================================
334--- django/http/__init__.py     (revision 5343)
335+++ django/http/__init__.py     (working copy)
336@@ -1,9 +1,13 @@
337-import os
338+import os, pickle
339 from Cookie import SimpleCookie
340 from pprint import pformat
341 from urllib import urlencode, quote
342 from django.utils.datastructures import MultiValueDict
343+from django.http.multipartparser import MultiPartParser, MultiPartParserError
344+import re
345 
346+upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression
347+
348 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
349 
350 try:
351@@ -17,6 +21,7 @@
352 
353 class HttpRequest(object):
354     "A basic HTTP request"
355+
356     def __init__(self):
357         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
358         self.path = ''
359@@ -42,37 +47,55 @@
360     def is_secure(self):
361         return os.environ.get("HTTPS") == "on"
362 
363-def parse_file_upload(header_dict, post_data):
364-    "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
365-    import email, email.Message
366-    from cgi import parse_header
367-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
368-    raw_message += '\r\n\r\n' + post_data
369-    msg = email.message_from_string(raw_message)
370-    POST = MultiValueDict()
371-    FILES = MultiValueDict()
372-    for submessage in msg.get_payload():
373-        if submessage and isinstance(submessage, email.Message.Message):
374-            name_dict = parse_header(submessage['Content-Disposition'])[1]
375-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
376-            # or {'name': 'blah'} for POST fields
377-            # We assume all uploaded files have a 'filename' set.
378-            if 'filename' in name_dict:
379-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
380-                if not name_dict['filename'].strip():
381-                    continue
382-                # IE submits the full path, so trim everything but the basename.
383-                # (We can't use os.path.basename because it expects Linux paths.)
384-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
385-                FILES.appendlist(name_dict['name'], {
386-                    'filename': filename,
387-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
388-                    'content': submessage.get_payload(),
389-                })
390-            else:
391-                POST.appendlist(name_dict['name'], submessage.get_payload())
392-    return POST, FILES
393+    def _get_file_progress(self):
394+        return {}
395 
396+    def _set_file_progress(self,value):
397+        pass
398+
399+    def _del_file_progress(self):
400+        pass
401+
402+    file_progress = property(_get_file_progress,
403+                             _set_file_progress,
404+                             _del_file_progress)
405+
406+    def _get_file_progress_from_args(self, headers, get, querystring):
407+        """
408+        This parses the request for a file progress_id value.
409+        Note that there are two distinct ways of getting the progress
410+        ID -- header and GET. One is used primarily to attach via JavaScript
411+        to the end of an HTML form action while the other is used for AJAX
412+        communication.
413+
414+        All progress IDs must be valid 32-digit hexadecimal numbers.
415+        """
416+        if 'X-Progress-ID' in headers:
417+            progress_id = headers['X-Upload-ID']
418+        elif 'progress_id' in get:
419+            progress_id = get['progress_id']
420+        else:
421+            return None
422+
423+        if not self.upload_id_re.match(progress_id):
424+            return None
425+
426+        return progress_id
427+
428+def parse_file_upload(headers, input, request):
429+    from django.conf import settings
430+
431+    # Only stream files to disk if FILE_STREAMING_DIR is set
432+    file_upload_dir = settings.FILE_UPLOAD_DIR
433+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
434+
435+    try:
436+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
437+        return parser.parse()
438+    except MultiPartParserError, e:
439+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
440+
441+
442 class QueryDict(MultiValueDict):
443     """A specialized MultiValueDict that takes a query string when initialized.
444     This is immutable unless you create a copy of it."""
445Index: django/oldforms/__init__.py
446===================================================================
447--- django/oldforms/__init__.py (revision 5343)
448+++ django/oldforms/__init__.py (working copy)
449@@ -666,17 +666,22 @@
450         self.validator_list = [self.isNonEmptyFile] + validator_list
451 
452     def isNonEmptyFile(self, field_data, all_data):
453-        try:
454-            content = field_data['content']
455-        except TypeError:
456+        if field_data.has_key('_file_upload_error'):
457+            raise validators.CriticalValidationError, field_data['_file_upload_error']
458+        if not field_data.has_key('filename'):
459             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
460-        if not content:
461+        if not field_data['content-length']:
462             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
463 
464     def render(self, data):
465         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
466             (self.get_id(), self.__class__.__name__, self.field_name)
467 
468+    def prepare(self, new_data):
469+        if new_data.has_key('_file_upload_error'):
470+            # pretend we got something in the field to raise a validation error later
471+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
472+
473     def html2python(data):
474         if data is None:
475             raise EmptyValue
476Index: django/db/models/base.py
477===================================================================
478--- django/db/models/base.py    (revision 5343)
479+++ django/db/models/base.py    (working copy)
480@@ -12,12 +12,14 @@
481 from django.dispatch import dispatcher
482 from django.utils.datastructures import SortedDict
483 from django.utils.functional import curry
484+from django.utils.file import file_move_safe
485 from django.conf import settings
486 from itertools import izip
487 import types
488 import sys
489 import os
490 
491+               
492 class ModelBase(type):
493     "Metaclass for all models"
494     def __new__(cls, name, bases, attrs):
495@@ -361,12 +363,16 @@
496     def _get_FIELD_size(self, field):
497         return os.path.getsize(self._get_FIELD_filename(field))
498 
499-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
500+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
501         directory = field.get_directory_name()
502         try: # Create the date-based directory if it doesn't exist.
503             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
504         except OSError: # Directory probably already exists.
505             pass
506+
507+        if filename is None:
508+            filename = raw_field['filename']
509+
510         filename = field.get_filename(filename)
511 
512         # If the filename already exists, keep adding an underscore to the name of
513@@ -383,9 +389,16 @@
514         setattr(self, field.attname, filename)
515 
516         full_filename = self._get_FIELD_filename(field)
517-        fp = open(full_filename, 'wb')
518-        fp.write(raw_contents)
519-        fp.close()
520+        if raw_field.has_key('tmpfilename'):
521+            raw_field['tmpfile'].close()
522+            file_move_safe(raw_field['tmpfilename'], full_filename)
523+        else:
524+            from django.utils import file_locks
525+            fp = open(full_filename, 'wb')
526+            # exclusive lock
527+            file_locks.lock(fp, file_locks.LOCK_EX)
528+            fp.write(raw_field['content'])
529+            fp.close()
530 
531         # Save the width and/or height, if applicable.
532         if isinstance(field, ImageField) and (field.width_field or field.height_field):
533Index: django/db/models/fields/__init__.py
534===================================================================
535--- django/db/models/fields/__init__.py (revision 5343)
536+++ django/db/models/fields/__init__.py (working copy)
537@@ -701,7 +701,8 @@
538         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
539         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
540         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
541-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
542+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
543+        setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save))       
544         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
545 
546     def delete_file(self, instance):
547@@ -724,9 +725,9 @@
548         if new_data.get(upload_field_name, False):
549             func = getattr(new_object, 'save_%s_file' % self.name)
550             if rel:
551-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
552+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
553             else:
554-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
555+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
556 
557     def get_directory_name(self):
558         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
559Index: django/conf/global_settings.py
560===================================================================
561--- django/conf/global_settings.py      (revision 5343)
562+++ django/conf/global_settings.py      (working copy)
563@@ -242,6 +242,16 @@
564 # isExistingURL validator.
565 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
566 
567+# The directory to place streamed file uploads. The web server needs write
568+# permissions on this directory.
569+# If this is None, streaming uploads are disabled.
570+FILE_UPLOAD_DIR = None
571+
572+# The minimum size of a POST before file uploads are streamed to disk.
573+# Any less than this number, and the file is uploaded to memory.
574+# Size is in bytes.
575+STREAMING_MIN_POST_SIZE = 512 * (2**10)
576+
577 ##############
578 # MIDDLEWARE #
579 ##############
580Index: django/core/handlers/wsgi.py
581===================================================================
582--- django/core/handlers/wsgi.py        (revision 5343)
583+++ django/core/handlers/wsgi.py        (working copy)
584@@ -75,6 +75,7 @@
585         self.environ = environ
586         self.path = environ['PATH_INFO']
587         self.META = environ
588+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
589         self.method = environ['REQUEST_METHOD'].upper()
590 
591     def __repr__(self):
592@@ -111,7 +112,14 @@
593             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
594                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
595                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
596-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
597+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
598+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
599+                try:
600+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
601+                except:
602+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
603+                    raise
604+                self._raw_post_data = None # raw data is not available for streamed multipart messages
605             else:
606                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
607         else:
608@@ -167,6 +175,17 @@
609             buf.close()
610             return self._raw_post_data
611 
612+    def _get_file_progress_id(self):
613+        """
614+        Returns the Progress ID of the request,
615+        usually provided if there is a file upload
616+        going on.
617+        Returns ``None`` if no progress ID is specified.
618+        """
619+        return self._get_file_progress_from_args(self.environ,
620+                                                 self.GET,
621+                                                 self.environ.get('QUERY_STRING', ''))
622+
623     GET = property(_get_get, _set_get)
624     POST = property(_get_post, _set_post)
625     COOKIES = property(_get_cookies, _set_cookies)
626Index: django/core/handlers/base.py
627===================================================================
628--- django/core/handlers/base.py        (revision 5343)
629+++ django/core/handlers/base.py        (working copy)
630@@ -1,7 +1,7 @@
631 from django.core import signals
632 from django.dispatch import dispatcher
633 from django import http
634-import sys
635+import sys, re
636 
637 class BaseHandler(object):
638     def __init__(self):
639@@ -129,3 +129,5 @@
640         "Helper function to return the traceback as a string"
641         import traceback
642         return '\n'.join(traceback.format_exception(*(exc_info or sys.exc_info())))
643+
644+
645Index: django/core/handlers/modpython.py
646===================================================================
647--- django/core/handlers/modpython.py   (revision 5343)
648+++ django/core/handlers/modpython.py   (working copy)
649@@ -47,7 +47,12 @@
650     def _load_post_and_files(self):
651         "Populates self._post and self._files"
652         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
653-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
654+            self._raw_post_data = None # raw data is not available for streamed multipart messages
655+            try:
656+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
657+            except:
658+                self._post, self._files = {}, {} # make sure we dont read the input stream again
659+                raise
660         else:
661             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
662 
663@@ -92,20 +97,21 @@
664                 'AUTH_TYPE':         self._req.ap_auth_type,
665                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
666                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
667-                'GATEWAY_INTERFACE': 'CGI/1.1',
668-                'PATH_INFO':         self._req.path_info,
669-                'PATH_TRANSLATED':   None, # Not supported
670-                'QUERY_STRING':      self._req.args,
671-                'REMOTE_ADDR':       self._req.connection.remote_ip,
672-                'REMOTE_HOST':       None, # DNS lookups not supported
673-                'REMOTE_IDENT':      self._req.connection.remote_logname,
674-                'REMOTE_USER':       self._req.user,
675-                'REQUEST_METHOD':    self._req.method,
676-                'SCRIPT_NAME':       None, # Not supported
677-                'SERVER_NAME':       self._req.server.server_hostname,
678-                'SERVER_PORT':       self._req.server.port,
679-                'SERVER_PROTOCOL':   self._req.protocol,
680-                'SERVER_SOFTWARE':   'mod_python'
681+                'GATEWAY_INTERFACE':  'CGI/1.1',
682+                'PATH_INFO':          self._req.path_info,
683+                'PATH_TRANSLATED':    None, # Not supported
684+                'QUERY_STRING':       self._req.args,
685+                'REMOTE_ADDR':        self._req.connection.remote_ip,
686+                'REMOTE_HOST':        None, # DNS lookups not supported
687+                'REMOTE_IDENT':       self._req.connection.remote_logname,
688+                'REMOTE_USER':        self._req.user,
689+                'REQUEST_METHOD':     self._req.method,
690+                'SCRIPT_NAME':        None, # Not supported
691+                'SERVER_NAME':        self._req.server.server_hostname,
692+                'SERVER_PORT':        self._req.server.port,
693+                'SERVER_PROTOCOL':    self._req.protocol,
694+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
695+                'SERVER_SOFTWARE':    'mod_python'
696             }
697             for key, value in self._req.headers_in.items():
698                 key = 'HTTP_' + key.upper().replace('-', '_')
699@@ -122,6 +128,17 @@
700     def _get_method(self):
701         return self.META['REQUEST_METHOD'].upper()
702 
703+    def _get_file_progress_id(self):
704+        """
705+        Returns the Progress ID of the request,
706+        usually provided if there is a file upload
707+        going on.
708+        Returns ``None`` if no progress ID is specified.
709+        """
710+        return self._get_file_progress_from_args(self._req.headers_in,
711+                                                 self.GET,
712+                                                 self._req.args)
713+
714     GET = property(_get_get, _set_get)
715     POST = property(_get_post, _set_post)
716     COOKIES = property(_get_cookies, _set_cookies)
717Index: django/utils/file_locks.py
718===================================================================
719--- django/utils/file_locks.py  (revision 0)
720+++ django/utils/file_locks.py  (revision 0)
721@@ -0,0 +1,50 @@
722+"""
723+Locking portability by Jonathan Feignberg <jdf@pobox.com> in python cookbook
724+
725+Example Usage::
726+
727+    from django.utils import file_locks
728+
729+    f = open('./file', 'wb')
730+
731+    file_locks.lock(f, file_locks.LOCK_EX)
732+    f.write('Django')
733+    f.close()
734+"""
735+
736+
737+import os
738+
739+__all__ = ['LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock']
740+
741+if os.name == 'nt':
742+       import win32con
743+       import win32file
744+       import pywintypes
745+       LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
746+       LOCK_SH = 0
747+       LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
748+       __overlapped = pywintypes.OVERLAPPED()
749+elif os.name == 'posix':
750+       import fcntl
751+       LOCK_EX = fcntl.LOCK_EX
752+       LOCK_SH = fcntl.LOCK_SH
753+       LOCK_NB = fcntl.LOCK_NB
754+else:
755+       raise RuntimeError("Locking only defined for nt and posix platforms")
756+
757+if os.name == 'nt':
758+       def lock(file, flags):
759+               hfile = win32file._get_osfhandle(file.fileno())
760+               win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
761+
762+       def unlock(file):
763+               hfile = win32file._get_osfhandle(file.fileno())
764+               win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
765+
766+elif os.name =='posix':
767+       def lock(file, flags):
768+               fcntl.flock(file.fileno(), flags)
769+
770+       def unlock(file):
771+               fcntl.flock(file.fileno(), fcntl.LOCK_UN)
772Index: django/utils/file.py
773===================================================================
774--- django/utils/file.py        (revision 0)
775+++ django/utils/file.py        (revision 0)
776@@ -0,0 +1,53 @@
777+import os
778+
779+__all__ = ['file_move_safe']
780+
781+try:
782+    import shutil
783+    file_move = shutil.move
784+except ImportError:
785+    file_move = os.rename
786+
787+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
788+    """
789+    Moves a file from one location to another in the safest way possible.
790+   
791+    First, it tries using shutils.move, which is OS-dependent but doesn't
792+    break with change of filesystems. Then it tries os.rename, which will
793+    break if it encounters a change in filesystems. Lastly, it streams
794+    it manually from one file to another in python.
795+
796+    Without ``allow_overwrite``, if the destination file exists, the
797+    file will raise an IOError.
798+    """
799+
800+    from django.utils import file_locks
801+
802+    if old_file_name == new_file_name:
803+        # No file moving takes place.
804+        return
805+
806+    if not allow_overwrite and os.path.exists(new_file_name):
807+        raise IOError, "Django does not allow overwriting files."
808+
809+    try:
810+        file_move(old_file_name, new_file_name)
811+        return
812+    except OSError: # moving to another filesystem
813+        pass
814+
815+    new_file = open(new_file_name, 'wb')
816+    # exclusive lock
817+    file_locks.lock(new_file, file_locks.LOCK_EX)
818+    old_file = open(old_file_name, 'rb')
819+    current_chunk = None
820+
821+    while current_chunk != '':
822+        current_chunk = old_file.read(chunk_size)
823+        new_file.write(current_chunk)
824+
825+    new_file.close()
826+    old_file.close()
827+
828+    os.remove(old_file_name)
829+
830Index: tests/modeltests/test_client/views.py
831===================================================================
832--- tests/modeltests/test_client/views.py       (revision 5343)
833+++ tests/modeltests/test_client/views.py       (working copy)
834@@ -46,6 +46,12 @@
835 
836     return HttpResponse(t.render(c))
837 
838+def post_file_view(request):
839+    "A view that expects a multipart post and returns a file in the context"
840+    t = Template('File {{ file.filename }} received', name='POST Template')
841+    c = Context({'file': request.FILES['file_file']})
842+    return HttpResponse(t.render(c))
843+
844 def redirect_view(request):
845     "A view that redirects all requests to the GET view"
846     return HttpResponseRedirect('/test_client/get_view/')
847Index: tests/modeltests/test_client/models.py
848===================================================================
849--- tests/modeltests/test_client/models.py      (revision 5343)
850+++ tests/modeltests/test_client/models.py      (working copy)
851@@ -3,7 +3,7 @@
852 
853 The test client is a class that can act like a simple
854 browser for testing purposes.
855
856+
857 It allows the user to compose GET and POST requests, and
858 obtain the response that the server gave to those requests.
859 The server Response objects are annotated with the details
860@@ -76,6 +76,20 @@
861         self.assertEqual(response.template.name, "Book template")
862         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
863 
864+    def test_post_file_view(self):
865+        "POST this python file to a view"
866+        import os, tempfile
867+        from django.conf import settings
868+        file = __file__.replace('.pyc', '.py')
869+        for upload_dir in [None, tempfile.gettempdir()]:
870+            settings.FILE_UPLOAD_DIR = upload_dir
871+            post_data = { 'name': file, 'file': open(file) }
872+            response = self.client.post('/test_client/post_file_view/', post_data)
873+            self.failUnless('models.py' in response.context['file']['filename'])
874+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
875+            if upload_dir:
876+                self.failUnless(response.context['file']['tmpfilename'])
877+
878     def test_redirect(self):
879         "GET a URL that redirects elsewhere"
880         response = self.client.get('/test_client/redirect_view/')
881Index: tests/modeltests/test_client/urls.py
882===================================================================
883--- tests/modeltests/test_client/urls.py        (revision 5343)
884+++ tests/modeltests/test_client/urls.py        (working copy)
885@@ -5,6 +5,7 @@
886 urlpatterns = patterns('',
887     (r'^get_view/$', views.get_view),
888     (r'^post_view/$', views.post_view),
889+    (r'^post_file_view/$', views.post_file_view),
890     (r'^raw_post_view/$', views.raw_post_view),
891     (r'^redirect_view/$', views.redirect_view),
892     (r'^permanent_redirect_view/$', redirect_to, { 'url': '/test_client/get_view/' }),
893Index: docs/request_response.txt
894===================================================================
895--- docs/request_response.txt   (revision 5343)
896+++ docs/request_response.txt   (working copy)
897@@ -72,13 +72,25 @@
898 ``FILES``
899     A dictionary-like object containing all uploaded files. Each key in
900     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
901-    value in ``FILES`` is a standard Python dictionary with the following three
902+    value in ``FILES`` is a standard Python dictionary with the following four
903     keys:
904 
905         * ``filename`` -- The name of the uploaded file, as a Python string.
906         * ``content-type`` -- The content type of the uploaded file.
907         * ``content`` -- The raw content of the uploaded file.
908+        * ``content-length`` -- The length of the content in bytes.
909 
910+    If streaming file uploads are enabled two additional keys
911+    describing the uploaded file will be present:
912+
913+       * ``tmpfilename`` -- The filename for the temporary file.
914+       * ``tmpfile`` -- An open file object for the temporary file.
915+
916+    The temporary file will be removed when the request finishes.
917+
918+    Note that accessing ``content`` when streaming uploads are enabled
919+    will read the whole file into memory which may not be what you want.
920+
921     Note that ``FILES`` will only contain data if the request method was POST
922     and the ``<form>`` that posted to the request had
923     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
924Index: docs/settings.txt
925===================================================================
926--- docs/settings.txt   (revision 5343)
927+++ docs/settings.txt   (working copy)
928@@ -448,6 +448,15 @@
929 
930 .. _Testing Django Applications: ../testing/
931 
932+FILE_UPLOAD_DIR
933+---------------
934+
935+Default: ``None``
936+
937+Path to a directory where temporary files should be written during
938+file uploads. Leaving this as ``None`` will disable streaming file uploads,
939+and cause all uploaded files to be stored (temporarily) in memory.
940+
941 IGNORABLE_404_ENDS
942 ------------------
943 
944@@ -764,6 +773,16 @@
945 
946 .. _site framework docs: ../sites/
947 
948+STREAMING_MIN_POST_SIZE
949+-----------------------
950+
951+Default: 524288 (``512*1024``)
952+
953+An integer specifying the minimum number of bytes that has to be
954+received (in a POST) for file upload streaming to take place. Any
955+request smaller than this will be handled in memory.
956+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
957+
958 TEMPLATE_CONTEXT_PROCESSORS
959 ---------------------------
960 
961Index: docs/forms.txt
962===================================================================
963--- docs/forms.txt      (revision 5343)
964+++ docs/forms.txt      (working copy)
965@@ -475,6 +475,19 @@
966    new_data = request.POST.copy()
967    new_data.update(request.FILES)
968 
969+Streaming file uploads.
970+-----------------------
971+
972+File uploads will be read into memory by default. This works fine for
973+small to medium sized uploads (from 1MB to 100MB depending on your
974+setup and usage). If you want to support larger uploads you can enable
975+upload streaming where only a small part of the file will be in memory
976+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
977+setting (see the settings_ document for more details).
978+
979+See `request object`_ for more details about ``request.FILES`` objects
980+with streaming file uploads enabled.
981+
982 Validators
983 ==========
984 
985@@ -698,3 +711,4 @@
986 .. _`generic views`: ../generic_views/
987 .. _`models API`: ../model-api/
988 .. _settings: ../settings/
989+.. _request object: ../request_response/#httprequest-objects
Back to Top