Code

Ticket #2070: 6654_fixed_tests_and_file_clean.diff

File 6654_fixed_tests_and_file_clean.diff, 45.0 KB (added by Øyvind Saltvik <oyvind@…>, 6 years ago)

diff did not apply cleanly, fixed

Line 
1Index: django/http/multipartparser.py
2===================================================================
3--- django/http/multipartparser.py      (revision 0)
4+++ django/http/multipartparser.py      (revision 0)
5@@ -0,0 +1,328 @@
6+"""
7+MultiPart parsing for file uploads.
8+If both a progress id is sent (either through ``X-Progress-ID``
9+header or ``progress_id`` GET) and ``FILE_UPLOAD_DIR`` is set
10+in the settings, then the file progress will be tracked using
11+``request.file_progress``.
12+
13+To use this feature, consider creating a middleware with an appropriate
14+``process_request``::
15+
16+    class FileProgressTrack(object):
17+        def __get__(self, request, HttpRequest):
18+            progress_id = request.META['UPLOAD_PROGRESS_ID']
19+            status = # get progress from progress_id here
20+
21+            return status
22+
23+        def __set__(self, request, new_value):
24+            progress_id = request.META['UPLOAD_PROGRESS_ID']
25+
26+            # set the progress using progress_id here.
27+
28+    # example middleware
29+    class FileProgressExample(object):
30+        def process_request(self, request):
31+            request.__class__.file_progress = FileProgressTrack()
32+
33+
34+
35+"""
36+
37+__all__ = ['MultiPartParserError','MultiPartParser']
38+
39+
40+from django.utils.datastructures import MultiValueDict
41+from django.http.utils import str_to_unicode
42+from django.conf import settings
43+import os
44+
45+try:
46+    from cStringIO import StringIO
47+except ImportError:
48+    from StringIO import StringIO
49+
50+
51+class MultiPartParserError(Exception):
52+    def __init__(self, message):
53+        self.message = message
54+    def __str__(self):
55+        return repr(self.message)
56+
57+class MultiPartParser(object):
58+    """
59+    A rfc2388 multipart/form-data parser.
60+   
61+    parse() reads the input stream in chunk_size chunks and returns a
62+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
63+    file_upload_dir is defined files will be streamed to temporary
64+    files in the specified directory.
65+
66+    The FILES dictionary will have 'filename', 'content-type',
67+    'content' and 'content-length' entries. For streamed files it will
68+    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
69+    only be read from disk when referenced for streamed files.
70+
71+    If the X-Progress-ID is sent (in one of many formats), then
72+    object.file_progress will be given a dictionary of the progress.
73+    """
74+    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
75+        try:
76+            content_length = int(headers['Content-Length'])
77+        except:
78+            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
79+
80+        content_type = headers.get('Content-Type')
81+
82+        if not content_type or not content_type.startswith('multipart/'):
83+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
84+           
85+        ctype, opts = self.parse_header(content_type)
86+        boundary = opts.get('boundary')
87+        from cgi import valid_boundary
88+        if not boundary or not valid_boundary(boundary):
89+            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
90+
91+        progress_id = request.META['UPLOAD_PROGRESS_ID']
92+
93+        self._track_progress = file_upload_dir and progress_id # whether or not to track progress
94+        self._boundary = '--' + boundary
95+        self._input = input
96+        self._size = content_length
97+        self._received = 0
98+        self._file_upload_dir = file_upload_dir
99+        self._chunk_size = chunk_size
100+        self._state = 'PREAMBLE'
101+        self._partial = ''
102+        self._post = MultiValueDict()
103+        self._files = MultiValueDict()
104+        self._request = request
105+        self._encoding = request.encoding or settings.DEFAULT_CHARSET
106+
107+        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
108+            self._file_upload_dir = None # disable file streaming for small request
109+        elif self._track_progress:
110+            request.file_progress = {'state': 'starting'}
111+
112+        try:
113+            # Use mx fast string search if available.
114+            from mx.TextTools import FS
115+            self._fs = FS(self._boundary)
116+        except ImportError:
117+            self._fs = None
118+
119+    def parse(self):
120+        try:
121+            self._parse()
122+        finally:
123+            if self._track_progress:
124+                self._request.file_progress = {'state': 'done'}
125+        return self._post, self._files
126+
127+    def _parse(self):
128+        size = self._size
129+
130+        try:
131+            while size > 0:
132+                n = self._read(self._input, min(self._chunk_size, size))
133+                if not n:
134+                    break
135+                size -= n
136+        except:
137+            # consume any remaining data so we dont generate a "Connection Reset" error
138+            size = self._size - self._received
139+            while size > 0:
140+                data = self._input.read(min(self._chunk_size, size))
141+                size -= len(data)
142+            raise
143+
144+    def _find_boundary(self, data, start, stop):
145+        """
146+        Find the next boundary and return the end of current part
147+        and start of next part.
148+        """
149+        if self._fs:
150+            boundary = self._fs.find(data, start, stop)
151+        else:
152+            boundary = data.find(self._boundary, start, stop)
153+        if boundary >= 0:
154+            end = boundary
155+            next = boundary + len(self._boundary)
156+
157+            # backup over CRLF
158+            if end > 0 and data[end-1] == '\n': end -= 1
159+            if end > 0 and data[end-1] == '\r': end -= 1
160+            # skip over --CRLF
161+            if next < stop and data[next] == '-': next += 1
162+            if next < stop and data[next] == '-': next += 1
163+            if next < stop and data[next] == '\r': next += 1
164+            if next < stop and data[next] == '\n': next += 1
165+
166+            return True, end, next
167+        else:
168+            return False, stop, stop
169+
170+    class TemporaryFile(object):
171+        "A temporary file that tries to delete itself when garbage collected."
172+        def __init__(self, dir):
173+            import tempfile
174+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
175+            self.file = os.fdopen(fd, 'w+b')
176+            self.name = name
177+
178+        def __getattr__(self, name):
179+            a = getattr(self.__dict__['file'], name)
180+            if type(a) != type(0):
181+                setattr(self, name, a)
182+            return a
183+
184+        def __del__(self):
185+            try:
186+                os.unlink(self.name)
187+            except OSError:
188+                pass
189+
190+    class LazyContent(dict):
191+        """
192+        A lazy FILES dictionary entry that reads the contents from
193+        tmpfile only when referenced.
194+        """
195+        def __init__(self, data):
196+            dict.__init__(self, data)
197+
198+        def __getitem__(self, key):
199+            if key == 'content' and not self.has_key(key):
200+                self['tmpfile'].seek(0)
201+                self['content'] = self['tmpfile'].read()
202+            return dict.__getitem__(self, key)
203+
204+    def _read(self, input, size):
205+        data = input.read(size)
206+
207+        if not data:
208+            return 0
209+
210+        read_size = len(data)
211+        self._received += read_size
212+
213+        if self._partial:
214+            data = self._partial + data
215+
216+        start = 0
217+        stop = len(data)
218+
219+        while start < stop:
220+            boundary, end, next = self._find_boundary(data, start, stop)
221+
222+            if not boundary and read_size:
223+                # make sure we dont treat a partial boundary (and its separators) as data
224+                stop -= len(self._boundary) + 16
225+                end = next = stop
226+                if end <= start:
227+                    break # need more data
228+
229+            if self._state == 'PREAMBLE':
230+                # Preamble, just ignore it
231+                self._state = 'HEADER'
232+
233+            elif self._state == 'HEADER':
234+                # Beginning of header, look for end of header and parse it if found.
235+
236+                header_end = data.find('\r\n\r\n', start, stop)
237+                if header_end == -1:
238+                    break # need more data
239+
240+                header = data[start:header_end]
241+
242+                self._fieldname = None
243+                self._filename = None
244+                self._content_type = None
245+
246+                for line in header.split('\r\n'):
247+                    ctype, opts = self.parse_header(line)
248+                    if ctype == 'content-disposition: form-data':
249+                        self._fieldname = opts.get('name')
250+                        self._filename = opts.get('filename')
251+                    elif ctype.startswith('content-type: '):
252+                        self._content_type = ctype[14:]
253+
254+                if self._filename is not None:
255+                    # cleanup filename from IE full paths:
256+                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
257+
258+                    if self._filename: # ignore files without filenames
259+                        if self._file_upload_dir:
260+                            try:
261+                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
262+                            except (OSError, IOError), e:
263+                                raise MultiPartParserError("Failed to create temporary file. Error was %s" % e)
264+                        else:
265+                            self._file = StringIO()
266+                    else:
267+                        self._file = None
268+                    self._filesize = 0
269+                    self._state = 'FILE'
270+                else:
271+                    self._field = StringIO()
272+                    self._state = 'FIELD'
273+                next = header_end + 4
274+
275+            elif self._state == 'FIELD':
276+                # In a field, collect data until a boundary is found.
277+
278+                self._field.write(data[start:end])
279+                if boundary:
280+                    if self._fieldname:
281+                        self._post.appendlist(self._fieldname, str_to_unicode(self._field.getvalue(), self._encoding))
282+                    self._field.close()
283+                    self._state = 'HEADER'
284+
285+            elif self._state == 'FILE':
286+                # In a file, collect data until a boundary is found.
287+
288+                if self._file:
289+                    try:
290+                        self._file.write(data[start:end])
291+                    except IOError, e:
292+                        raise MultiPartParserError("Failed to write to temporary file.")
293+                    self._filesize += end-start
294+
295+                    if self._track_progress:
296+                        self._request.file_progress = {'received': self._received,
297+                                                       'size':     self._size,
298+                                                       'state':    'uploading'}
299+
300+                if boundary:
301+                    if self._file:
302+                        if self._file_upload_dir:
303+                            self._file.seek(0)
304+                            file = self.LazyContent({
305+                                'filename': str_to_unicode(self._filename, self._encoding),
306+                                'content-type':  self._content_type,
307+                                # 'content': is read on demand
308+                                'content-length': self._filesize,
309+                                'tmpfilename': self._file.name,
310+                                'tmpfile': self._file
311+                            })
312+                        else:
313+                            file = {
314+                                'filename': str_to_unicode(self._filename, self._encoding),
315+                                'content-type':  self._content_type,
316+                                'content': self._file.getvalue(),
317+                                'content-length': self._filesize
318+                            }
319+                            self._file.close()
320+
321+                        self._files.appendlist(self._fieldname, file)
322+
323+                    self._state = 'HEADER'
324+
325+            start = next
326+
327+        self._partial = data[start:]
328+
329+        return read_size
330+
331+    def parse_header(self, line):
332+        from cgi import parse_header
333+        return parse_header(line)
334Index: django/http/__init__.py
335===================================================================
336--- django/http/__init__.py     (revision 6654)
337+++ django/http/__init__.py     (working copy)
338@@ -1,11 +1,16 @@
339 import os
340+import re
341 from Cookie import SimpleCookie
342 from pprint import pformat
343 from urllib import urlencode
344 from urlparse import urljoin
345+from django.http.utils import str_to_unicode
346+from django.http.multipartparser import MultiPartParser, MultiPartParserError
347 from django.utils.datastructures import MultiValueDict, FileDict
348 from django.utils.encoding import smart_str, iri_to_uri, force_unicode
349 
350+upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression
351+
352 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
353 
354 try:
355@@ -78,7 +83,7 @@
356 
357     def is_secure(self):
358         return os.environ.get("HTTPS") == "on"
359-
360+       
361     def _set_encoding(self, val):
362         """
363         Sets the encoding used for GET/POST accesses. If the GET or POST
364@@ -96,39 +101,55 @@
365 
366     encoding = property(_get_encoding, _set_encoding)
367 
368-def parse_file_upload(header_dict, post_data):
369-    "Returns a tuple of (POST QueryDict, FILES MultiValueDict)"
370-    import email, email.Message
371-    from cgi import parse_header
372-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
373-    raw_message += '\r\n\r\n' + post_data
374-    msg = email.message_from_string(raw_message)
375-    POST = QueryDict('', mutable=True)
376-    FILES = MultiValueDict()
377-    for submessage in msg.get_payload():
378-        if submessage and isinstance(submessage, email.Message.Message):
379-            name_dict = parse_header(submessage['Content-Disposition'])[1]
380-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
381-            # or {'name': 'blah'} for POST fields
382-            # We assume all uploaded files have a 'filename' set.
383-            if 'filename' in name_dict:
384-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
385-                if not name_dict['filename'].strip():
386-                    continue
387-                # IE submits the full path, so trim everything but the basename.
388-                # (We can't use os.path.basename because that uses the server's
389-                # directory separator, which may not be the same as the
390-                # client's one.)
391-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
392-                FILES.appendlist(name_dict['name'], FileDict({
393-                    'filename': filename,
394-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
395-                    'content': submessage.get_payload(),
396-                }))
397-            else:
398-                POST.appendlist(name_dict['name'], submessage.get_payload())
399-    return POST, FILES
400+    def _get_file_progress(self):
401+        return {}
402+   
403+    def _set_file_progress(self,value):
404+        pass
405 
406+    def _del_file_progress(self):
407+        pass
408+
409+    file_progress = property(_get_file_progress,
410+                             _set_file_progress,
411+                             _del_file_progress)
412+
413+    def _get_file_progress_from_args(self, headers, get, querystring):
414+        """
415+        This parses the request for a file progress_id value.
416+        Note that there are two distinct ways of getting the progress
417+        ID -- header and GET. One is used primarily to attach via JavaScript
418+        to the end of an HTML form action while the other is used for AJAX
419+        communication.
420+
421+        All progress IDs must be valid 32-digit hexadecimal numbers.
422+        """
423+        if 'X-Upload-ID' in headers:
424+            progress_id = headers['X-Upload-ID']
425+        elif 'progress_id' in get:
426+            progress_id = get['progress_id']
427+        else:
428+            return None
429+
430+        if not upload_id_re.match(progress_id):
431+            return None
432+
433+        return progress_id
434+
435+def parse_file_upload(headers, input, request):
436+    from django.conf import settings
437+
438+    # Only stream files to disk if FILE_STREAMING_DIR is set
439+    file_upload_dir = settings.FILE_UPLOAD_DIR
440+    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
441+
442+    try:
443+        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
444+        return parser.parse()
445+    except MultiPartParserError, e:
446+        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
447+
448+
449 class QueryDict(MultiValueDict):
450     """
451     A specialized MultiValueDict that takes a query string when initialized.
452@@ -399,20 +420,3 @@
453 # A backwards compatible alias for HttpRequest.get_host.
454 def get_host(request):
455     return request.get_host()
456-
457-# It's neither necessary nor appropriate to use
458-# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus,
459-# this slightly more restricted function.
460-def str_to_unicode(s, encoding):
461-    """
462-    Convert basestring objects to unicode, using the given encoding. Illegaly
463-    encoded input characters are replaced with Unicode "unknown" codepoint
464-    (\ufffd).
465-
466-    Returns any non-basestring objects without change.
467-    """
468-    if isinstance(s, str):
469-        return unicode(s, encoding, 'replace')
470-    else:
471-        return s
472-
473Index: django/http/utils.py
474===================================================================
475--- django/http/utils.py        (revision 0)
476+++ django/http/utils.py        (revision 0)
477@@ -0,0 +1,16 @@
478+# It's neither necessary nor appropriate to use
479+# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus,
480+# this slightly more restricted function.
481+def str_to_unicode(s, encoding):
482+    """
483+    Convert basestring objects to unicode, using the given encoding. Illegaly
484+    encoded input characters are replaced with Unicode "unknown" codepoint
485+    (\ufffd).
486+
487+    Returns any non-basestring objects without change.
488+    """
489+    if isinstance(s, str):
490+        return unicode(s, encoding, 'replace')
491+    else:
492+        return s
493+
494Index: django/oldforms/__init__.py
495===================================================================
496--- django/oldforms/__init__.py (revision 6654)
497+++ django/oldforms/__init__.py (working copy)
498@@ -680,17 +680,22 @@
499         self.validator_list = [self.isNonEmptyFile] + validator_list
500 
501     def isNonEmptyFile(self, field_data, all_data):
502-        try:
503-            content = field_data['content']
504-        except TypeError:
505+        if field_data.has_key('_file_upload_error'):
506+            raise validators.CriticalValidationError, field_data['_file_upload_error']
507+        if not field_data.has_key('filename'):
508             raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.")
509-        if not content:
510+        if not field_data['content-length']:
511             raise validators.CriticalValidationError, ugettext("The submitted file is empty.")
512 
513     def render(self, data):
514         return u'<input type="file" id="%s" class="v%s" name="%s" />' % \
515             (self.get_id(), self.__class__.__name__, self.field_name)
516 
517+    def prepare(self, new_data):
518+        if new_data.has_key('_file_upload_error'):
519+            # pretend we got something in the field to raise a validation error later
520+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
521+
522     def html2python(data):
523         if data is None:
524             raise EmptyValue
525Index: django/db/models/base.py
526===================================================================
527--- django/db/models/base.py    (revision 6654)
528+++ django/db/models/base.py    (working copy)
529@@ -12,6 +12,7 @@
530 from django.dispatch import dispatcher
531 from django.utils.datastructures import SortedDict
532 from django.utils.functional import curry
533+from django.utils.file import file_move_safe
534 from django.utils.encoding import smart_str, force_unicode, smart_unicode
535 from django.conf import settings
536 from itertools import izip
537@@ -379,12 +380,16 @@
538     def _get_FIELD_size(self, field):
539         return os.path.getsize(self._get_FIELD_filename(field))
540 
541-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
542+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
543         directory = field.get_directory_name()
544         try: # Create the date-based directory if it doesn't exist.
545             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
546         except OSError: # Directory probably already exists.
547             pass
548+
549+        if filename is None:
550+            filename = raw_field['filename']
551+
552         filename = field.get_filename(filename)
553 
554         # If the filename already exists, keep adding an underscore to the name of
555@@ -401,9 +406,16 @@
556         setattr(self, field.attname, filename)
557 
558         full_filename = self._get_FIELD_filename(field)
559-        fp = open(full_filename, 'wb')
560-        fp.write(raw_contents)
561-        fp.close()
562+        if raw_field.has_key('tmpfilename'):
563+            raw_field['tmpfile'].close()
564+            file_move_safe(raw_field['tmpfilename'], full_filename)
565+        else:
566+            from django.utils import file_locks
567+            fp = open(full_filename, 'wb')
568+            # exclusive lock
569+            file_locks.lock(fp, file_locks.LOCK_EX)
570+            fp.write(raw_field['content'])
571+            fp.close()
572 
573         # Save the width and/or height, if applicable.
574         if isinstance(field, ImageField) and (field.width_field or field.height_field):
575Index: django/db/models/fields/__init__.py
576===================================================================
577--- django/db/models/fields/__init__.py (revision 6654)
578+++ django/db/models/fields/__init__.py (working copy)
579@@ -761,7 +761,8 @@
580         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
581         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
582         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
583-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
584+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
585+        setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save))       
586         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
587 
588     def delete_file(self, instance):
589@@ -784,9 +785,9 @@
590         if new_data.get(upload_field_name, False):
591             func = getattr(new_object, 'save_%s_file' % self.name)
592             if rel:
593-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
594+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
595             else:
596-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
597+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
598 
599     def get_directory_name(self):
600         return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
601@@ -798,7 +799,7 @@
602 
603     def save_form_data(self, instance, data):
604         if data:
605-            getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False)
606+            getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False)
607 
608     def formfield(self, **kwargs):
609         defaults = {'form_class': forms.FileField}
610Index: django/conf/global_settings.py
611===================================================================
612--- django/conf/global_settings.py      (revision 6654)
613+++ django/conf/global_settings.py      (working copy)
614@@ -251,6 +251,16 @@
615 from django import get_version
616 URL_VALIDATOR_USER_AGENT = "Django/%s (http://www.djangoproject.com)" % get_version()
617 
618+# The directory to place streamed file uploads. The web server needs write
619+# permissions on this directory.
620+# If this is None, streaming uploads are disabled.
621+FILE_UPLOAD_DIR = None
622+
623+# The minimum size of a POST before file uploads are streamed to disk.
624+# Any less than this number, and the file is uploaded to memory.
625+# Size is in bytes.
626+STREAMING_MIN_POST_SIZE = 512 * (2**10)
627+
628 ##############
629 # MIDDLEWARE #
630 ##############
631Index: django/core/validators.py
632===================================================================
633--- django/core/validators.py   (revision 6654)
634+++ django/core/validators.py   (working copy)
635@@ -177,17 +177,17 @@
636     from PIL import Image
637     from cStringIO import StringIO
638     try:
639-        content = field_data['content']
640+        filename = field_data['filename']
641     except TypeError:
642         raise ValidationError, _("No file was submitted. Check the encoding type on the form.")
643     try:
644         # load() is the only method that can spot a truncated JPEG,
645         #  but it cannot be called sanely after verify()
646-        trial_image = Image.open(StringIO(content))
647+        trial_image = Image.open(field_data.get('tmpfilename') or StringIO(field_data.get('content','')))
648         trial_image.load()
649         # verify() is the only method that can spot a corrupt PNG,
650         #  but it must be called immediately after the constructor
651-        trial_image = Image.open(StringIO(content))
652+        trial_image = Image.open(field_data.get('tmpfilename') or StringIO(field_data.get('content','')))
653         trial_image.verify()
654     except Exception: # Python Imaging Library doesn't recognize it as an image
655         raise ValidationError, _("Upload a valid image. The file you uploaded was either not an image or a corrupted image.")
656Index: django/core/handlers/wsgi.py
657===================================================================
658--- django/core/handlers/wsgi.py        (revision 6654)
659+++ django/core/handlers/wsgi.py        (working copy)
660@@ -77,6 +77,7 @@
661         self.environ = environ
662         self.path = force_unicode(environ['PATH_INFO'])
663         self.META = environ
664+        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
665         self.method = environ['REQUEST_METHOD'].upper()
666 
667     def __repr__(self):
668@@ -114,7 +115,14 @@
669             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
670                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
671                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
672-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
673+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
674+                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
675+                try:
676+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
677+                except:
678+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
679+                    raise
680+                self._raw_post_data = None # raw data is not available for streamed multipart messages
681             else:
682                 self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
683         else:
684@@ -172,6 +180,17 @@
685             buf.close()
686             return self._raw_post_data
687 
688+    def _get_file_progress_id(self):
689+        """
690+        Returns the Progress ID of the request,
691+        usually provided if there is a file upload
692+        going on.
693+        Returns ``None`` if no progress ID is specified.
694+        """
695+        return self._get_file_progress_from_args(self.environ,
696+                                                 self.GET,
697+                                                 self.environ.get('QUERY_STRING', ''))
698+
699     GET = property(_get_get, _set_get)
700     POST = property(_get_post, _set_post)
701     COOKIES = property(_get_cookies, _set_cookies)
702Index: django/core/handlers/modpython.py
703===================================================================
704--- django/core/handlers/modpython.py   (revision 6654)
705+++ django/core/handlers/modpython.py   (working copy)
706@@ -51,7 +51,12 @@
707     def _load_post_and_files(self):
708         "Populates self._post and self._files"
709         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
710-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
711+            self._raw_post_data = None # raw data is not available for streamed multipart messages
712+            try:
713+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
714+            except:
715+                self._post, self._files = {}, {} # make sure we dont read the input stream again
716+                raise
717         else:
718             self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
719 
720@@ -96,20 +101,21 @@
721                 'AUTH_TYPE':         self._req.ap_auth_type,
722                 'CONTENT_LENGTH':    self._req.clength, # This may be wrong
723                 'CONTENT_TYPE':      self._req.content_type, # This may be wrong
724-                'GATEWAY_INTERFACE': 'CGI/1.1',
725-                'PATH_INFO':         self._req.path_info,
726-                'PATH_TRANSLATED':   None, # Not supported
727-                'QUERY_STRING':      self._req.args,
728-                'REMOTE_ADDR':       self._req.connection.remote_ip,
729-                'REMOTE_HOST':       None, # DNS lookups not supported
730-                'REMOTE_IDENT':      self._req.connection.remote_logname,
731-                'REMOTE_USER':       self._req.user,
732-                'REQUEST_METHOD':    self._req.method,
733-                'SCRIPT_NAME':       None, # Not supported
734-                'SERVER_NAME':       self._req.server.server_hostname,
735-                'SERVER_PORT':       self._req.server.port,
736-                'SERVER_PROTOCOL':   self._req.protocol,
737-                'SERVER_SOFTWARE':   'mod_python'
738+                'GATEWAY_INTERFACE':  'CGI/1.1',
739+                'PATH_INFO':          self._req.path_info,
740+                'PATH_TRANSLATED':    None, # Not supported
741+                'QUERY_STRING':       self._req.args,
742+                'REMOTE_ADDR':        self._req.connection.remote_ip,
743+                'REMOTE_HOST':        None, # DNS lookups not supported
744+                'REMOTE_IDENT':       self._req.connection.remote_logname,
745+                'REMOTE_USER':        self._req.user,
746+                'REQUEST_METHOD':     self._req.method,
747+                'SCRIPT_NAME':        None, # Not supported
748+                'SERVER_NAME':        self._req.server.server_hostname,
749+                'SERVER_PORT':        self._req.server.port,
750+                'SERVER_PROTOCOL':    self._req.protocol,
751+                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
752+                'SERVER_SOFTWARE':    'mod_python'
753             }
754             for key, value in self._req.headers_in.items():
755                 key = 'HTTP_' + key.upper().replace('-', '_')
756@@ -126,6 +132,17 @@
757     def _get_method(self):
758         return self.META['REQUEST_METHOD'].upper()
759 
760+    def _get_file_progress_id(self):
761+        """
762+        Returns the Progress ID of the request,
763+        usually provided if there is a file upload
764+        going on.
765+        Returns ``None`` if no progress ID is specified.
766+        """
767+        return self._get_file_progress_from_args(self._req.headers_in,
768+                                                 self.GET,
769+                                                 self._req.args)
770+
771     GET = property(_get_get, _set_get)
772     POST = property(_get_post, _set_post)
773     COOKIES = property(_get_cookies, _set_cookies)
774Index: django/newforms/fields.py
775===================================================================
776--- django/newforms/fields.py   (revision 6654)
777+++ django/newforms/fields.py   (working copy)
778@@ -421,9 +421,9 @@
779 
780 class UploadedFile(StrAndUnicode):
781     "A wrapper for files uploaded in a FileField"
782-    def __init__(self, filename, content):
783+    def __init__(self, filename, data):
784         self.filename = filename
785-        self.content = content
786+        self.data = data
787 
788     def __unicode__(self):
789         """
790@@ -448,12 +448,12 @@
791         if not self.required and data in EMPTY_VALUES:
792             return None
793         try:
794-            f = UploadedFile(data['filename'], data['content'])
795+            f = UploadedFile(data['filename'], data)
796         except TypeError:
797             raise ValidationError(self.error_messages['invalid'])
798         except KeyError:
799             raise ValidationError(self.error_messages['missing'])
800-        if not f.content:
801+        if not f.data.get('content-length'):
802             raise ValidationError(self.error_messages['empty'])
803         return f
804 
805@@ -475,11 +475,11 @@
806         try:
807             # load() is the only method that can spot a truncated JPEG,
808             #  but it cannot be called sanely after verify()
809-            trial_image = Image.open(StringIO(f.content))
810+            trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content']))
811             trial_image.load()
812             # verify() is the only method that can spot a corrupt PNG,
813             #  but it must be called immediately after the constructor
814-            trial_image = Image.open(StringIO(f.content))
815+            trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content']))
816             trial_image.verify()
817         except Exception: # Python Imaging Library doesn't recognize it as an image
818             raise ValidationError(self.error_messages['invalid_image'])
819Index: django/utils/file_locks.py
820===================================================================
821--- django/utils/file_locks.py  (revision 0)
822+++ django/utils/file_locks.py  (revision 0)
823@@ -0,0 +1,50 @@
824+"""
825+Locking portability by Jonathan Feignberg <jdf@pobox.com> in python cookbook
826+
827+Example Usage::
828+
829+    from django.utils import file_locks
830+
831+    f = open('./file', 'wb')
832+
833+    file_locks.lock(f, file_locks.LOCK_EX)
834+    f.write('Django')
835+    f.close()
836+"""
837+
838+
839+import os
840+
841+__all__ = ['LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock']
842+
843+if os.name == 'nt':
844+       import win32con
845+       import win32file
846+       import pywintypes
847+       LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
848+       LOCK_SH = 0
849+       LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
850+       __overlapped = pywintypes.OVERLAPPED()
851+elif os.name == 'posix':
852+       import fcntl
853+       LOCK_EX = fcntl.LOCK_EX
854+       LOCK_SH = fcntl.LOCK_SH
855+       LOCK_NB = fcntl.LOCK_NB
856+else:
857+       raise RuntimeError("Locking only defined for nt and posix platforms")
858+
859+if os.name == 'nt':
860+       def lock(file, flags):
861+               hfile = win32file._get_osfhandle(file.fileno())
862+               win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
863+
864+       def unlock(file):
865+               hfile = win32file._get_osfhandle(file.fileno())
866+               win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
867+
868+elif os.name =='posix':
869+       def lock(file, flags):
870+               fcntl.flock(file.fileno(), flags)
871+
872+       def unlock(file):
873+               fcntl.flock(file.fileno(), fcntl.LOCK_UN)
874Index: django/utils/file.py
875===================================================================
876--- django/utils/file.py        (revision 0)
877+++ django/utils/file.py        (revision 0)
878@@ -0,0 +1,53 @@
879+import os
880+
881+__all__ = ['file_move_safe']
882+
883+try:
884+    import shutil
885+    file_move = shutil.move
886+except ImportError:
887+    file_move = os.rename
888+
889+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
890+    """
891+    Moves a file from one location to another in the safest way possible.
892+   
893+    First, it tries using shutils.move, which is OS-dependent but doesn't
894+    break with change of filesystems. Then it tries os.rename, which will
895+    break if it encounters a change in filesystems. Lastly, it streams
896+    it manually from one file to another in python.
897+
898+    Without ``allow_overwrite``, if the destination file exists, the
899+    file will raise an IOError.
900+    """
901+
902+    from django.utils import file_locks
903+
904+    if old_file_name == new_file_name:
905+        # No file moving takes place.
906+        return
907+
908+    if not allow_overwrite and os.path.exists(new_file_name):
909+        raise IOError, "Django does not allow overwriting files."
910+
911+    try:
912+        file_move(old_file_name, new_file_name)
913+        return
914+    except OSError: # moving to another filesystem
915+        pass
916+
917+    new_file = open(new_file_name, 'wb')
918+    # exclusive lock
919+    file_locks.lock(new_file, file_locks.LOCK_EX)
920+    old_file = open(old_file_name, 'rb')
921+    current_chunk = None
922+
923+    while current_chunk != '':
924+        current_chunk = old_file.read(chunk_size)
925+        new_file.write(current_chunk)
926+
927+    new_file.close()
928+    old_file.close()
929+
930+    os.remove(old_file_name)
931+
932Index: tests/modeltests/test_client/views.py
933===================================================================
934--- tests/modeltests/test_client/views.py       (revision 6654)
935+++ tests/modeltests/test_client/views.py       (working copy)
936@@ -47,6 +47,12 @@
937 
938     return HttpResponse(t.render(c))
939 
940+def post_file_view(request):
941+    "A view that expects a multipart post and returns a file in the context"
942+    t = Template('File {{ file.filename }} received', name='POST Template')
943+    c = Context({'file': request.FILES['file_file']})
944+    return HttpResponse(t.render(c))
945+
946 def redirect_view(request):
947     "A view that redirects all requests to the GET view"
948     if request.GET:
949Index: tests/modeltests/test_client/models.py
950===================================================================
951--- tests/modeltests/test_client/models.py      (revision 6654)
952+++ tests/modeltests/test_client/models.py      (working copy)
953@@ -4,7 +4,7 @@
954 
955 The test client is a class that can act like a simple
956 browser for testing purposes.
957
958+
959 It allows the user to compose GET and POST requests, and
960 obtain the response that the server gave to those requests.
961 The server Response objects are annotated with the details
962@@ -80,6 +80,21 @@
963         self.assertEqual(response.template.name, "Book template")
964         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
965 
966+    def test_post_file_view(self):
967+        "POST this python file to a view"
968+        import os, tempfile
969+        from django.conf import settings
970+        file = __file__.replace('.pyc', '.py')
971+        for upload_dir, streaming_size in [(None,512*1000), (tempfile.gettempdir(), 1)]:
972+            settings.FILE_UPLOAD_DIR = upload_dir
973+            settings.STREAMING_MIN_POST_SIZE = streaming_size
974+            post_data = { 'name': file, 'file_file': open(file) }
975+            response = self.client.post('/test_client/post_file_view/', post_data)
976+            self.failUnless('models.py' in response.context['file']['filename'])
977+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
978+            if upload_dir:
979+                self.failUnless(response.context['file']['tmpfilename'])
980+
981     def test_redirect(self):
982         "GET a URL that redirects elsewhere"
983         response = self.client.get('/test_client/redirect_view/')
984Index: tests/modeltests/test_client/urls.py
985===================================================================
986--- tests/modeltests/test_client/urls.py        (revision 6654)
987+++ tests/modeltests/test_client/urls.py        (working copy)
988@@ -5,6 +5,7 @@
989 urlpatterns = patterns('',
990     (r'^get_view/$', views.get_view),
991     (r'^post_view/$', views.post_view),
992+    (r'^post_file_view/$', views.post_file_view),
993     (r'^raw_post_view/$', views.raw_post_view),
994     (r'^redirect_view/$', views.redirect_view),
995     (r'^permanent_redirect_view/$', redirect_to, { 'url': '/test_client/get_view/' }),
996Index: tests/regressiontests/forms/fields.py
997===================================================================
998--- tests/regressiontests/forms/fields.py       (revision 6654)
999+++ tests/regressiontests/forms/fields.py       (working copy)
1000@@ -760,7 +760,7 @@
1001 ...
1002 ValidationError: [u'No file was submitted. Check the encoding type on the form.']
1003 
1004->>> f.clean({'filename': 'name', 'content':None})
1005+>>> f.clean({'filename': 'name', 'content':None, 'content-length': 0})
1006 Traceback (most recent call last):
1007 ...
1008 ValidationError: [u'The submitted file is empty.']
1009@@ -770,7 +770,7 @@
1010 ...
1011 ValidationError: [u'The submitted file is empty.']
1012 
1013->>> type(f.clean({'filename': 'name', 'content':'Some File Content'}))
1014+>>> type(f.clean({'filename': 'name', 'content':'Some File Content', 'content-length': len('Some File Content')}))
1015 <class 'django.newforms.fields.UploadedFile'>
1016 
1017 # URLField ##################################################################
1018Index: tests/regressiontests/forms/forms.py
1019===================================================================
1020--- tests/regressiontests/forms/forms.py        (revision 6654)
1021+++ tests/regressiontests/forms/forms.py        (working copy)
1022@@ -1410,7 +1410,7 @@
1023 >>> print f
1024 <tr><th>File1:</th><td><ul class="errorlist"><li>No file was submitted. Check the encoding type on the form.</li></ul><input type="file" name="file1" /></td></tr>
1025 
1026->>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':'some content'}}, auto_id=False)
1027+>>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':'some content', 'content-length': len('some content')}}, auto_id=False)
1028 >>> print f
1029 <tr><th>File1:</th><td><input type="file" name="file1" /></td></tr>
1030 >>> f.is_valid()
1031Index: docs/request_response.txt
1032===================================================================
1033--- docs/request_response.txt   (revision 6654)
1034+++ docs/request_response.txt   (working copy)
1035@@ -82,13 +82,25 @@
1036 ``FILES``
1037     A dictionary-like object containing all uploaded files. Each key in
1038     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
1039-    value in ``FILES`` is a standard Python dictionary with the following three
1040+    value in ``FILES`` is a standard Python dictionary with the following four
1041     keys:
1042 
1043         * ``filename`` -- The name of the uploaded file, as a Python string.
1044         * ``content-type`` -- The content type of the uploaded file.
1045         * ``content`` -- The raw content of the uploaded file.
1046+        * ``content-length`` -- The length of the content in bytes.
1047 
1048+    If streaming file uploads are enabled two additional keys
1049+    describing the uploaded file will be present:
1050+
1051+       * ``tmpfilename`` -- The filename for the temporary file.
1052+       * ``tmpfile`` -- An open file object for the temporary file.
1053+
1054+    The temporary file will be removed when the request finishes.
1055+
1056+    Note that accessing ``content`` when streaming uploads are enabled
1057+    will read the whole file into memory which may not be what you want.
1058+
1059     Note that ``FILES`` will only contain data if the request method was POST
1060     and the ``<form>`` that posted to the request had
1061     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
1062Index: docs/settings.txt
1063===================================================================
1064--- docs/settings.txt   (revision 6654)
1065+++ docs/settings.txt   (working copy)
1066@@ -480,6 +480,15 @@
1067 
1068 .. _Testing Django Applications: ../testing/
1069 
1070+FILE_UPLOAD_DIR
1071+---------------
1072+
1073+Default: ``None``
1074+
1075+Path to a directory where temporary files should be written during
1076+file uploads. Leaving this as ``None`` will disable streaming file uploads,
1077+and cause all uploaded files to be stored (temporarily) in memory.
1078+
1079 IGNORABLE_404_ENDS
1080 ------------------
1081 
1082@@ -845,6 +854,16 @@
1083 
1084 .. _site framework docs: ../sites/
1085 
1086+STREAMING_MIN_POST_SIZE
1087+-----------------------
1088+
1089+Default: 524288 (``512*1024``)
1090+
1091+An integer specifying the minimum number of bytes that has to be
1092+received (in a POST) for file upload streaming to take place. Any
1093+request smaller than this will be handled in memory.
1094+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
1095+
1096 TEMPLATE_CONTEXT_PROCESSORS
1097 ---------------------------
1098 
1099Index: docs/forms.txt
1100===================================================================
1101--- docs/forms.txt      (revision 6654)
1102+++ docs/forms.txt      (working copy)
1103@@ -475,6 +475,19 @@
1104    new_data = request.POST.copy()
1105    new_data.update(request.FILES)
1106 
1107+Streaming file uploads.
1108+-----------------------
1109+
1110+File uploads will be read into memory by default. This works fine for
1111+small to medium sized uploads (from 1MB to 100MB depending on your
1112+setup and usage). If you want to support larger uploads you can enable
1113+upload streaming where only a small part of the file will be in memory
1114+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
1115+setting (see the settings_ document for more details).
1116+
1117+See `request object`_ for more details about ``request.FILES`` objects
1118+with streaming file uploads enabled.
1119+
1120 Validators
1121 ==========
1122 
1123@@ -698,3 +711,4 @@
1124 .. _`generic views`: ../generic_views/
1125 .. _`models API`: ../model-api/
1126 .. _settings: ../settings/
1127+.. _request object: ../request_response/#httprequest-objects