Django

Code

Ticket #2070: streaming.7092.patch.partial_tests_fix

File streaming.7092.patch.partial_tests_fix, 48.6 kB (added by faheem, 6 months ago)

Slightly modified version of streaming.7092.patch with more tests passing.

Line 
1 diff -r 8f50398714c1 -r ea52e616a876 django/conf/global_settings.py
2 --- a/django/conf/global_settings.py    Fri Feb 08 07:01:23 2008 -0500
3 +++ b/django/conf/global_settings.py    Fri Feb 08 15:41:48 2008 -0500
4 @@ -257,6 +257,16 @@ DEFAULT_TABLESPACE = ''
5  DEFAULT_TABLESPACE = ''
6  DEFAULT_INDEX_TABLESPACE = ''
7  
8 +# The directory to place streamed file uploads. The web server needs write
9 +# permissions on this directory.
10 +# If this is None, streaming uploads are disabled.
11 +FILE_UPLOAD_DIR = None
12 +
13 +# The minimum size of a POST before file uploads are streamed to disk.
14 +# Any less than this number, and the file is uploaded to memory.
15 +# Size is in bytes.
16 +STREAMING_MIN_POST_SIZE = 512 * (2**10)
17 +
18  ##############
19  # MIDDLEWARE #
20  ##############
21 diff -r 8f50398714c1 -r ea52e616a876 django/core/handlers/modpython.py
22 --- a/django/core/handlers/modpython.py Fri Feb 08 07:01:23 2008 -0500
23 +++ b/django/core/handlers/modpython.py Fri Feb 08 15:41:48 2008 -0500
24 @@ -52,7 +52,12 @@ class ModPythonRequest(http.HttpRequest)
25      def _load_post_and_files(self):
26          "Populates self._post and self._files"
27          if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
28 -            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
29 +            self._raw_post_data = None # raw data is not available for streamed multipart messages
30 +            try:
31 +                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self)
32 +            except:
33 +                self._post, self._files = {}, {} # make sure we dont read the input stream again
34 +                raise
35          else:
36              self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
37  
38 @@ -97,20 +102,21 @@ class ModPythonRequest(http.HttpRequest)
39                  'AUTH_TYPE':         self._req.ap_auth_type,
40                  'CONTENT_LENGTH':    self._req.clength, # This may be wrong
41                  'CONTENT_TYPE':      self._req.content_type, # This may be wrong
42 -                'GATEWAY_INTERFACE': 'CGI/1.1',
43 -                'PATH_INFO':         self._req.path_info,
44 -                'PATH_TRANSLATED':   None, # Not supported
45 -                'QUERY_STRING':      self._req.args,
46 -                'REMOTE_ADDR':       self._req.connection.remote_ip,
47 -                'REMOTE_HOST':       None, # DNS lookups not supported
48 -                'REMOTE_IDENT':      self._req.connection.remote_logname,
49 -                'REMOTE_USER':       self._req.user,
50 -                'REQUEST_METHOD':    self._req.method,
51 -                'SCRIPT_NAME':       None, # Not supported
52 -                'SERVER_NAME':       self._req.server.server_hostname,
53 -                'SERVER_PORT':       self._req.server.port,
54 -                'SERVER_PROTOCOL':   self._req.protocol,
55 -                'SERVER_SOFTWARE':   'mod_python'
56 +                'GATEWAY_INTERFACE':  'CGI/1.1',
57 +                'PATH_INFO':          self._req.path_info,
58 +                'PATH_TRANSLATED':    None, # Not supported
59 +                'QUERY_STRING':       self._req.args,
60 +                'REMOTE_ADDR':        self._req.connection.remote_ip,
61 +                'REMOTE_HOST':        None, # DNS lookups not supported
62 +                'REMOTE_IDENT':       self._req.connection.remote_logname,
63 +                'REMOTE_USER':        self._req.user,
64 +                'REQUEST_METHOD':     self._req.method,
65 +                'SCRIPT_NAME':        None, # Not supported
66 +                'SERVER_NAME':        self._req.server.server_hostname,
67 +                'SERVER_PORT':        self._req.server.port,
68 +                'SERVER_PROTOCOL':    self._req.protocol,
69 +                'UPLOAD_PROGRESS_ID': self._get_file_progress_id(),
70 +                'SERVER_SOFTWARE':    'mod_python'
71              }
72              for key, value in self._req.headers_in.items():
73                  key = 'HTTP_' + key.upper().replace('-', '_')
74 @@ -126,6 +132,17 @@ class ModPythonRequest(http.HttpRequest)
75  
76      def _get_method(self):
77          return self.META['REQUEST_METHOD'].upper()
78 +
79 +    def _get_file_progress_id(self):
80 +        """
81 +        Returns the Progress ID of the request,
82 +        usually provided if there is a file upload
83 +        going on.
84 +        Returns ``None`` if no progress ID is specified.
85 +        """
86 +        return self._get_file_progress_from_args(self._req.headers_in,
87 +                                                 self.GET,
88 +                                                 self._req.args)
89  
90      GET = property(_get_get, _set_get)
91      POST = property(_get_post, _set_post)
92 diff -r 8f50398714c1 -r ea52e616a876 django/core/handlers/wsgi.py
93 --- a/django/core/handlers/wsgi.py      Fri Feb 08 07:01:23 2008 -0500
94 +++ b/django/core/handlers/wsgi.py      Fri Feb 08 15:41:48 2008 -0500
95 @@ -77,6 +77,7 @@ class WSGIRequest(http.HttpRequest):
96          self.environ = environ
97          self.path = force_unicode(environ['PATH_INFO'])
98          self.META = environ
99 +        self.META['UPLOAD_PROGRESS_ID'] = self._get_file_progress_id()
100          self.method = environ['REQUEST_METHOD'].upper()
101  
102      def __repr__(self):
103 @@ -114,7 +115,14 @@ class WSGIRequest(http.HttpRequest):
104              if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
105                  header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
106                  header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
107 -                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
108 +                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
109 +                header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '')
110 +                try:
111 +                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self)
112 +                except:
113 +                    self._post, self._files = {}, {} # make sure we dont read the input stream again
114 +                    raise
115 +                self._raw_post_data = None # raw data is not available for streamed multipart messages
116              else:
117                  self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
118          else:
119 @@ -172,6 +180,17 @@ class WSGIRequest(http.HttpRequest):
120              buf.close()
121              return self._raw_post_data
122  
123 +    def _get_file_progress_id(self):
124 +        """
125 +        Returns the Progress ID of the request,
126 +        usually provided if there is a file upload
127 +        going on.
128 +        Returns ``None`` if no progress ID is specified.
129 +        """
130 +        return self._get_file_progress_from_args(self.environ,
131 +                                                 self.GET,
132 +                                                 self.environ.get('QUERY_STRING', ''))
133 +
134      GET = property(_get_get, _set_get)
135      POST = property(_get_post, _set_post)
136      COOKIES = property(_get_cookies, _set_cookies)
137 diff -r 8f50398714c1 -r ea52e616a876 django/core/validators.py
138 --- a/django/core/validators.py Fri Feb 08 07:01:23 2008 -0500
139 +++ b/django/core/validators.py Fri Feb 08 15:41:48 2008 -0500
140 @@ -177,17 +177,17 @@ def isValidImage(field_data, all_data):
141      from PIL import Image
142      from cStringIO import StringIO
143      try:
144 -        content = field_data['content']
145 +        filename = field_data['filename']
146      except TypeError:
147          raise ValidationError, _("No file was submitted. Check the encoding type on the form.")
148      try:
149          # load() is the only method that can spot a truncated JPEG,
150          #  but it cannot be called sanely after verify()
151 -        trial_image = Image.open(StringIO(content))
152 +        trial_image = Image.open(field_data.get('tmpfilename') or StringIO(field_data.get('content','')))
153          trial_image.load()
154          # verify() is the only method that can spot a corrupt PNG,
155          #  but it must be called immediately after the constructor
156 -        trial_image = Image.open(StringIO(content))
157 +        trial_image = Image.open(field_data.get('tmpfilename') or StringIO(field_data.get('content','')))
158          trial_image.verify()
159      except Exception: # Python Imaging Library doesn't recognize it as an image
160          raise ValidationError, _("Upload a valid image. The file you uploaded was either not an image or a corrupted image.")
161 diff -r 8f50398714c1 -r ea52e616a876 django/db/models/base.py
162 --- a/django/db/models/base.py  Fri Feb 08 07:01:23 2008 -0500
163 +++ b/django/db/models/base.py  Fri Feb 08 15:41:48 2008 -0500
164 @@ -12,6 +12,7 @@ from django.dispatch import dispatcher
165  from django.dispatch import dispatcher
166  from django.utils.datastructures import SortedDict
167  from django.utils.functional import curry
168 +from django.utils.file import file_move_safe
169  from django.utils.encoding import smart_str, force_unicode, smart_unicode
170  from django.conf import settings
171  from itertools import izip
172 @@ -379,12 +380,16 @@ class Model(object):
173      def _get_FIELD_size(self, field):
174          return os.path.getsize(self._get_FIELD_filename(field))
175  
176 -    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
177 +    def _save_FIELD_file(self, field, filename, raw_field, save=True):
178          directory = field.get_directory_name()
179          try: # Create the date-based directory if it doesn't exist.
180              os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
181          except OSError: # Directory probably already exists.
182              pass
183 +
184 +        if filename is None:
185 +            filename = raw_field['filename']
186 +
187          filename = field.get_filename(filename)
188  
189          # If the filename already exists, keep adding an underscore to the name of
190 @@ -401,9 +406,16 @@ class Model(object):
191          setattr(self, field.attname, filename)
192  
193          full_filename = self._get_FIELD_filename(field)
194 -        fp = open(full_filename, 'wb')
195 -        fp.write(raw_contents)
196 -        fp.close()
197 +        if raw_field.has_key('tmpfilename'):
198 +            raw_field['tmpfile'].close()
199 +            file_move_safe(raw_field['tmpfilename'], full_filename)
200 +        else:
201 +            from django.utils import file_locks
202 +            fp = open(full_filename, 'wb')
203 +            # exclusive lock
204 +            file_locks.lock(fp, file_locks.LOCK_EX)
205 +            fp.write(raw_field['content'])
206 +            fp.close()
207  
208          # Save the width and/or height, if applicable.
209          if isinstance(field, ImageField) and (field.width_field or field.height_field):
210 diff -r 8f50398714c1 -r ea52e616a876 django/db/models/fields/__init__.py
211 --- a/django/db/models/fields/__init__.py       Fri Feb 08 07:01:23 2008 -0500
212 +++ b/django/db/models/fields/__init__.py       Fri Feb 08 15:41:48 2008 -0500
213 @@ -761,7 +761,8 @@ class FileField(Field):
214          setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
215          setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
216          setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
217 -        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
218 +        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
219 +        setattr(cls, 'move_%s_file' % self.name, lambda instance, raw_field, save=True: instance._save_FIELD_file(self, None, raw_field, save))       
220          dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
221  
222      def delete_file(self, instance):
223 @@ -784,9 +785,9 @@ class FileField(Field):
224          if new_data.get(upload_field_name, False):
225              func = getattr(new_object, 'save_%s_file' % self.name)
226              if rel:
227 -                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
228 +                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
229              else:
230 -                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
231 +                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
232  
233      def get_directory_name(self):
234          return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
235 @@ -799,7 +800,7 @@ class FileField(Field):
236      def save_form_data(self, instance, data):
237          from django.newforms.fields import UploadedFile
238          if data and isinstance(data, UploadedFile):
239 -            getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False)
240 +            getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False)
241  
242      def formfield(self, **kwargs):
243          defaults = {'form_class': forms.FileField}
244 diff -r 8f50398714c1 -r ea52e616a876 django/http/__init__.py
245 --- a/django/http/__init__.py   Fri Feb 08 07:01:23 2008 -0500
246 +++ b/django/http/__init__.py   Fri Feb 08 15:41:48 2008 -0500
247 @@ -1,11 +1,16 @@ import os
248  import os
249 +import re
250  from Cookie import SimpleCookie
251  from pprint import pformat
252  from urllib import urlencode
253  from urlparse import urljoin
254 +from django.http.utils import str_to_unicode
255 +from django.http.multipartparser import MultiPartParser, MultiPartParserError
256  from django.utils.datastructures import MultiValueDict, FileDict
257  from django.utils.encoding import smart_str, iri_to_uri, force_unicode
258  from utils import *
259 +
260 +upload_id_re = re.compile(r'^[a-fA-F0-9]{32}$') # file progress id Regular expression
261  
262  RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
263  
264 @@ -79,7 +84,7 @@ class HttpRequest(object):
265  
266      def is_secure(self):
267          return os.environ.get("HTTPS") == "on"
268 -
269 +       
270      def _set_encoding(self, val):
271          """
272          Sets the encoding used for GET/POST accesses. If the GET or POST
273 @@ -97,38 +102,54 @@ class HttpRequest(object):
274  
275      encoding = property(_get_encoding, _set_encoding)
276  
277 -def parse_file_upload(header_dict, post_data):
278 -    "Returns a tuple of (POST QueryDict, FILES MultiValueDict)"
279 -    import email, email.Message
280 -    from cgi import parse_header
281 -    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
282 -    raw_message += '\r\n\r\n' + post_data
283 -    msg = email.message_from_string(raw_message)
284 -    POST = QueryDict('', mutable=True)
285 -    FILES = MultiValueDict()
286 -    for submessage in msg.get_payload():
287 -        if submessage and isinstance(submessage, email.Message.Message):
288 -            name_dict = parse_header(submessage['Content-Disposition'])[1]
289 -            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
290 -            # or {'name': 'blah'} for POST fields
291 -            # We assume all uploaded files have a 'filename' set.
292 -            if 'filename' in name_dict:
293 -                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
294 -                if not name_dict['filename'].strip():
295 -                    continue
296 -                # IE submits the full path, so trim everything but the basename.
297 -                # (We can't use os.path.basename because that uses the server's
298 -                # directory separator, which may not be the same as the
299 -                # client's one.)
300 -                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
301 -                FILES.appendlist(name_dict['name'], FileDict({
302 -                    'filename': filename,
303 -                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
304 -                    'content': submessage.get_payload(),
305 -                }))
306 -            else:
307 -                POST.appendlist(name_dict['name'], submessage.get_payload())
308 -    return POST, FILES
309 +    def _get_file_progress(self):
310 +        return {}
311 +   
312 +    def _set_file_progress(self,value):
313 +        pass
314 +
315 +    def _del_file_progress(self):
316 +        pass
317 +
318 +    file_progress = property(_get_file_progress,
319 +                             _set_file_progress,
320 +                             _del_file_progress)
321 +
322 +    def _get_file_progress_from_args(self, headers, get, querystring):
323 +        """
324 +        This parses the request for a file progress_id value.
325 +        Note that there are two distinct ways of getting the progress
326 +        ID -- header and GET. One is used primarily to attach via JavaScript
327 +        to the end of an HTML form action while the other is used for AJAX
328 +        communication.
329 +
330 +        All progress IDs must be valid 32-digit hexadecimal numbers.
331 +        """
332 +        if 'X-Upload-ID' in headers:
333 +            progress_id = headers['X-Upload-ID']
334 +        elif 'progress_id' in get:
335 +            progress_id = get['progress_id']
336 +        else:
337 +            return None
338 +
339 +        if not upload_id_re.match(progress_id):
340 +            return None
341 +
342 +        return progress_id
343 +
344 +def parse_file_upload(headers, input, request):
345 +    from django.conf import settings
346 +
347 +    # Only stream files to disk if FILE_STREAMING_DIR is set
348 +    file_upload_dir = settings.FILE_UPLOAD_DIR
349 +    streaming_min_post_size = settings.STREAMING_MIN_POST_SIZE
350 +
351 +    try:
352 +        parser = MultiPartParser(headers, input, request, file_upload_dir, streaming_min_post_size)
353 +        return parser.parse()
354 +    except MultiPartParserError, e:
355 +        return MultiValueDict({ '_file_upload_error': [e.message] }), {}
356 +
357  
358  class QueryDict(MultiValueDict):
359      """
360 @@ -413,20 +434,3 @@ class HttpResponseServerError(HttpRespon
361  # A backwards compatible alias for HttpRequest.get_host.
362  def get_host(request):
363      return request.get_host()
364 -
365 -# It's neither necessary nor appropriate to use
366 -# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus,
367 -# this slightly more restricted function.
368 -def str_to_unicode(s, encoding):
369 -    """
370 -    Convert basestring objects to unicode, using the given encoding. Illegaly
371 -    encoded input characters are replaced with Unicode "unknown" codepoint
372 -    (\ufffd).
373 -
374 -    Returns any non-basestring objects without change.
375 -    """
376 -    if isinstance(s, str):
377 -        return unicode(s, encoding, 'replace')
378 -    else:
379 -        return s
380 -
381 diff -r 8f50398714c1 -r ea52e616a876 django/http/multipartparser.py
382 --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
383 +++ b/django/http/multipartparser.py    Fri Feb 08 15:41:48 2008 -0500
384 @@ -0,0 +1,328 @@
385 +"""
386 +MultiPart parsing for file uploads.
387 +If both a progress id is sent (either through ``X-Progress-ID``
388 +header or ``progress_id`` GET) and ``FILE_UPLOAD_DIR`` is set
389 +in the settings, then the file progress will be tracked using
390 +``request.file_progress``.
391 +
392 +To use this feature, consider creating a middleware with an appropriate
393 +``process_request``::
394 +
395 +    class FileProgressTrack(object):
396 +        def __get__(self, request, HttpRequest):
397 +            progress_id = request.META['UPLOAD_PROGRESS_ID']
398 +            status = # get progress from progress_id here
399 +
400 +            return status
401 +
402 +        def __set__(self, request, new_value):
403 +            progress_id = request.META['UPLOAD_PROGRESS_ID']
404 +
405 +            # set the progress using progress_id here.
406 +
407 +    # example middleware
408 +    class FileProgressExample(object):
409 +        def process_request(self, request):
410 +            request.__class__.file_progress = FileProgressTrack()
411 +
412 +
413 +
414 +"""
415 +
416 +__all__ = ['MultiPartParserError','MultiPartParser']
417 +
418 +
419 +from django.utils.datastructures import MultiValueDict
420 +from django.http.utils import str_to_unicode
421 +from django.conf import settings
422 +import os
423 +
424 +try:
425 +    from cStringIO import StringIO
426 +except ImportError:
427 +    from StringIO import StringIO
428 +
429 +
430 +class MultiPartParserError(Exception):
431 +    def __init__(self, message):
432 +        self.message = message
433 +    def __str__(self):
434 +        return repr(self.message)
435 +
436 +class MultiPartParser(object):
437 +    """
438 +    A rfc2388 multipart/form-data parser.
439 +   
440 +    parse() reads the input stream in chunk_size chunks and returns a
441 +    tuple of (POST MultiValueDict, FILES MultiValueDict). If
442 +    file_upload_dir is defined files will be streamed to temporary
443 +    files in the specified directory.
444 +
445 +    The FILES dictionary will have 'filename', 'content-type',
446 +    'content' and 'content-length' entries. For streamed files it will
447 +    also have 'tmpfilename' and 'tmpfile'. The 'content' entry will
448 +    only be read from disk when referenced for streamed files.
449 +
450 +    If the X-Progress-ID is sent (in one of many formats), then
451 +    object.file_progress will be given a dictionary of the progress.
452 +    """
453 +    def __init__(self, headers, input, request, file_upload_dir=None, streaming_min_post_size=None, chunk_size=1024*64):
454 +        try:
455 +            content_length = int(headers['Content-Length'])
456 +        except:
457 +            raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length'))
458 +
459 +        content_type = headers.get('Content-Type')
460 +
461 +        if not content_type or not content_type.startswith('multipart/'):
462 +            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
463 +           
464 +        ctype, opts = self.parse_header(content_type)
465 +        boundary = opts.get('boundary')
466 +        from cgi import valid_boundary
467 +        if not boundary or not valid_boundary(boundary):
468 +            raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary)
469 +
470 +        progress_id = request.META['UPLOAD_PROGRESS_ID']
471 +
472 +        self._track_progress = file_upload_dir and progress_id # whether or not to track progress
473 +        self._boundary = '--' + boundary
474 +        self._input = input
475 +        self._size = content_length
476 +        self._received = 0
477 +        self._file_upload_dir = file_upload_dir
478 +        self._chunk_size = chunk_size
479 +        self._state = 'PREAMBLE'
480 +        self._partial = ''
481 +        self._post = MultiValueDict()
482 +        self._files = MultiValueDict()
483 +        self._request = request
484 +        self._encoding = request.encoding or settings.DEFAULT_CHARSET
485 +
486 +        if streaming_min_post_size is not None and content_length < streaming_min_post_size:
487 +            self._file_upload_dir = None # disable file streaming for small request
488 +        elif self._track_progress:
489 +            request.file_progress = {'state': 'starting'}
490 +
491 +        try:
492 +            # Use mx fast string search if available.
493 +            from mx.TextTools import FS
494 +            self._fs = FS(self._boundary)
495 +        except ImportError:
496 +            self._fs = None
497 +
498 +    def parse(self):
499 +        try:
500 +            self._parse()
501 +        finally:
502 +            if self._track_progress:
503 +                self._request.file_progress = {'state': 'done'}
504 +        return self._post, self._files
505 +
506 +    def _parse(self):
507 +        size = self._size
508 +
509 +        try:
510 +            while size > 0:
511 +                n = self._read(self._input, min(self._chunk_size, size))
512 +                if not n:
513 +                    break
514 +                size -= n
515 +        except:
516 +            # consume any remaining data so we dont generate a "Connection Reset" error
517 +            size = self._size - self._received
518 +            while size > 0:
519 +                data = self._input.read(min(self._chunk_size, size))
520 +                size -= len(data)
521 +            raise
522 +
523 +    def _find_boundary(self, data, start, stop):
524 +        """
525 +        Find the next boundary and return the end of current part
526 +        and start of next part.
527 +        """
528 +        if self._fs:
529 +            boundary = self._fs.find(data, start, stop)
530 +        else:
531 +            boundary = data.find(self._boundary, start, stop)
532 +        if boundary >= 0:
533 +            end = boundary
534 +            next = boundary + len(self._boundary)
535 +
536 +            # backup over CRLF
537 +            if end > 0 and data[end-1] == '\n': end -= 1
538 +            if end > 0 and data[end-1] == '\r': end -= 1
539 +            # skip over --CRLF
540 +            if next < stop and data[next] == '-': next += 1
541 +            if next < stop and data[next] == '-': next += 1
542 +            if next < stop and data[next] == '\r': next += 1
543 +            if next < stop and data[next] == '\n': next += 1
544 +
545 +            return True, end, next
546 +        else:
547 +            return False, stop, stop
548 +
549 +    class TemporaryFile(object):
550 +        "A temporary file that tries to delete itself when garbage collected."
551 +        def __init__(self, dir):
552 +            import tempfile
553 +            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
554 +            self.file = os.fdopen(fd, 'w+b')
555 +            self.name = name
556 +
557 +        def __getattr__(self, name):
558 +            a = getattr(self.__dict__['file'], name)
559 +            if type(a) != type(0):
560 +                setattr(self, name, a)
561 +            return a
562 +
563 +        def __del__(self):
564 +            try:
565 +                os.unlink(self.name)
566 +            except OSError:
567 +                pass
568 +
569 +    class LazyContent(dict):
570 +        """
571 +        A lazy FILES dictionary entry that reads the contents from
572 +        tmpfile only when referenced.
573 +        """
574 +        def __init__(self, data):
575 +            dict.__init__(self, data)
576 +
577 +        def __getitem__(self, key):
578 +            if key == 'content' and not self.has_key(key):
579 +                self['tmpfile'].seek(0)
580 +                self['content'] = self['tmpfile'].read()
581 +            return dict.__getitem__(self, key)
582 +
583 +    def _read(self, input, size):
584 +        data = input.read(size)
585 +
586 +        if not data:
587 +            return 0
588 +
589 +        read_size = len(data)
590 +        self._received += read_size
591 +
592 +        if self._partial:
593 +            data = self._partial + data
594 +
595 +        start = 0
596 +        stop = len(data)
597 +
598 +        while start < stop:
599 +            boundary, end, next = self._find_boundary(data, start, stop)
600 +
601 +            if not boundary and read_size:
602 +                # make sure we dont treat a partial boundary (and its separators) as data
603 +                stop -= len(self._boundary) + 16
604 +                end = next = stop
605 +                if end <= start:
606 +                    break # need more data
607 +
608 +            if self._state == 'PREAMBLE':
609 +                # Preamble, just ignore it
610 +                self._state = 'HEADER'
611 +
612 +            elif self._state == 'HEADER':
613 +                # Beginning of header, look for end of header and parse it if found.
614 +
615 +                header_end = data.find('\r\n\r\n', start, stop)
616 +                if header_end == -1:
617 +                    break # need more data
618 +
619 +                header = data[start:header_end]
620 +
621 +                self._fieldname = None
622 +                self._filename = None
623 +                self._content_type = None
624 +
625 +                for line in header.split('\r\n'):
626 +                    ctype, opts = self.parse_header(line)
627 +                    if ctype == 'content-disposition: form-data':
628 +                        self._fieldname = opts.get('name')
629 +                        self._filename = opts.get('filename')
630 +                    elif ctype.startswith('content-type: '):
631 +                        self._content_type = ctype[14:]
632 +
633 +                if self._filename is not None:
634 +                    # cleanup filename from IE full paths:
635 +                    self._filename = self._filename[self._filename.rfind("\\")+1:].strip()
636 +
637 +                    if self._filename: # ignore files without filenames
638 +                        if self._file_upload_dir:
639 +                            try:
640 +                                self._file = self.TemporaryFile(dir=self._file_upload_dir)
641 +                            except (OSError, IOError), e:
642 +                                raise MultiPartParserError("Failed to create temporary file. Error was %s" % e)
643 +                        else:
644 +                            self._file = StringIO()
645 +                    else:
646 +                        self._file = None
647 +                    self._filesize = 0
648 +                    self._state = 'FILE'
649 +                else:
650 +                    self._field = StringIO()
651 +                    self._state = 'FIELD'
652 +                next = header_end + 4
653 +
654 +            elif self._state == 'FIELD':
655 +                # In a field, collect data until a boundary is found.
656 +
657 +                self._field.write(data[start:end])
658 +                if boundary:
659 +                    if self._fieldname:
660 +                        self._post.appendlist(self._fieldname, str_to_unicode(self._field.getvalue(), self._encoding))
661 +                    self._field.close()
662 +                    self._state = 'HEADER'
663 +
664 +            elif self._state == 'FILE':
665 +                # In a file, collect data until a boundary is found.
666 +
667 +                if self._file:
668 +                    try:
669 +                        self._file.write(data[start:end])
670 +                    except IOError, e:
671 +                        raise MultiPartParserError("Failed to write to temporary file.")
672 +                    self._filesize += end-start
673 +
674 +                    if self._track_progress:
675 +                        self._request.file_progress = {'received': self._received,
676 +                                                       'size':     self._size,
677 +                                                       'state':    'uploading'}
678 +
679 +                if boundary:
680 +                    if self._file:
681 +                        if self._file_upload_dir:
682 +                            self._file.seek(0)
683 +                            file = self.LazyContent({
684 +                                'filename': str_to_unicode(self._filename, self._encoding),
685 +                                'content-type':  self._content_type,
686 +                                # 'content': is read on demand
687 +                                'content-length': self._filesize,
688 +                                'tmpfilename': self._file.name,
689 +                                'tmpfile': self._file
690 +                            })
691 +                        else:
692 +                            file = {
693 +                                'filename': str_to_unicode(self._filename, self._encoding),
694 +                                'content-type':  self._content_type,
695 +                                'content': self._file.getvalue(),
696 +                                'content-length': self._filesize
697 +                            }
698 +                            self._file.close()
699 +
700 +                        self._files.appendlist(self._fieldname, file)
701 +
702 +                    self._state = 'HEADER'
703 +
704 +            start = next
705 +
706 +        self._partial = data[start:]
707 +
708 +        return read_size
709 +
710 +    def parse_header(self, line):
711 +        from cgi import parse_header
712 +        return parse_header(line)
713 diff -r 8f50398714c1 -r ea52e616a876 django/http/utils.py
714 --- a/django/http/utils.py      Fri Feb 08 07:01:23 2008 -0500
715 +++ b/django/http/utils.py      Fri Feb 08 15:41:48 2008 -0500
716 @@ -1,3 +1,19 @@
717 +# It's neither necessary nor appropriate to use
718 +# django.utils.encoding.smart_unicode for parsing URLs and form inputs. Thus,
719 +# this slightly more restricted function.
720 +def str_to_unicode(s, encoding):
721 +    """
722 +    Convert basestring objects to unicode, using the given encoding. Illegaly
723 +    encoded input characters are replaced with Unicode "unknown" codepoint
724 +    (\ufffd).
725 +
726 +    Returns any non-basestring objects without change.
727 +    """
728 +    if isinstance(s, str):
729 +        return unicode(s, encoding, 'replace')
730 +    else:
731 +        return s
732 +
733  """
734  Functions that modify an HTTP request or response in some way.
735  """
736 diff -r 8f50398714c1 -r ea52e616a876 django/newforms/fields.py
737 --- a/django/newforms/fields.py Fri Feb 08 07:01:23 2008 -0500
738 +++ b/django/newforms/fields.py Fri Feb 08 15:41:48 2008 -0500
739 @@ -415,9 +415,9 @@ except ImportError:
740  
741  class UploadedFile(StrAndUnicode):
742      "A wrapper for files uploaded in a FileField"
743 -    def __init__(self, filename, content):
744 +    def __init__(self, filename, data):
745          self.filename = filename
746 -        self.content = content
747 +        self.data = data
748  
749      def __unicode__(self):
750          """
751 @@ -444,12 +444,12 @@ class FileField(Field):
752          elif not data and initial:
753              return initial
754          try:
755 -            f = UploadedFile(data['filename'], data['content'])
756 +            f = UploadedFile(data['filename'], data)
757          except TypeError:
758              raise ValidationError(self.error_messages['invalid'])
759          except KeyError:
760              raise ValidationError(self.error_messages['missing'])
761 -        if not f.content:
762 +        if not f.data.get('content-length'):
763              raise ValidationError(self.error_messages['empty'])
764          return f
765  
766 @@ -473,11 +473,11 @@ class ImageField(FileField):
767          try:
768              # load() is the only method that can spot a truncated JPEG,
769              #  but it cannot be called sanely after verify()
770 -            trial_image = Image.open(StringIO(f.content))
771 +            trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content']))
772              trial_image.load()
773              # verify() is the only method that can spot a corrupt PNG,
774              #  but it must be called immediately after the constructor
775 -            trial_image = Image.open(StringIO(f.content))
776 +            trial_image = Image.open(f.data.get('tmpfilename') or StringIO(f.data['content']))
777              trial_image.verify()
778          except Exception: # Python Imaging Library doesn't recognize it as an image
779              raise ValidationError(self.error_messages['invalid_image'])
780 diff -r 8f50398714c1 -r ea52e616a876 django/oldforms/__init__.py
781 --- a/django/oldforms/__init__.py       Fri Feb 08 07:01:23 2008 -0500
782 +++ b/django/oldforms/__init__.py       Fri Feb 08 15:41:48 2008 -0500
783 @@ -681,16 +681,21 @@ class FileUploadField(FormField):
784          self.validator_list = [self.isNonEmptyFile] + validator_list
785  
786      def isNonEmptyFile(self, field_data, all_data):
787 -        try:
788 -            content = field_data['content']
789 -        except TypeError:
790 +        if field_data.has_key('_file_upload_error'):
791 +            raise validators.CriticalValidationError, field_data['_file_upload_error']
792 +        if not field_data.has_key('filename'):
793              raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.")
794 -        if not content:
795 +        if not field_data['content-length']:
796              raise validators.CriticalValidationError, ugettext("The submitted file is empty.")
797  
798      def render(self, data):
799          return mark_safe(u'<input type="file" id="%s" class="v%s" name="%s" />' % \
800              (self.get_id(), self.__class__.__name__, self.field_name))
801 +
802 +    def prepare(self, new_data):
803 +        if new_data.has_key('_file_upload_error'):
804 +            # pretend we got something in the field to raise a validation error later
805 +            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
806  
807      def html2python(data):
808          if data is None:
809 diff -r 8f50398714c1 -r ea52e616a876 django/utils/file.py
810 --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
811 +++ b/django/utils/file.py      Fri Feb 08 15:41:48 2008 -0500
812 @@ -0,0 +1,53 @@
813 +import os
814 +
815 +__all__ = ['file_move_safe']
816 +
817 +try:
818 +    import shutil
819 +    file_move = shutil.move
820 +except ImportError:
821 +    file_move = os.rename
822 +
823 +def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
824 +    """
825 +    Moves a file from one location to another in the safest way possible.
826 +   
827 +    First, it tries using shutils.move, which is OS-dependent but doesn't
828 +    break with change of filesystems. Then it tries os.rename, which will
829 +    break if it encounters a change in filesystems. Lastly, it streams
830 +    it manually from one file to another in python.
831 +
832 +    Without ``allow_overwrite``, if the destination file exists, the
833 +    file will raise an IOError.
834 +    """
835 +
836 +    from django.utils import file_locks
837 +
838 +    if old_file_name == new_file_name:
839 +        # No file moving takes place.
840 +        return
841 +
842 +    if not allow_overwrite and os.path.exists(new_file_name):
843 +        raise IOError, "Django does not allow overwriting files."
844 +
845 +    try:
846 +        file_move(old_file_name, new_file_name)
847 +        return
848 +    except OSError: # moving to another filesystem
849 +        pass
850 +
851 +    new_file = open(new_file_name, 'wb')
852 +    # exclusive lock
853 +    file_locks.lock(new_file, file_locks.LOCK_EX)
854 +    old_file = open(old_file_name, 'rb')
855 +    current_chunk = None
856 +
857 +    while current_chunk != '':
858 +        current_chunk = old_file.read(chunk_size)
859 +        new_file.write(current_chunk)
860 +
861 +    new_file.close()
862 +    old_file.close()
863 +
864 +    os.remove(old_file_name)
865 +
866 diff -r 8f50398714c1 -r ea52e616a876 django/utils/file_locks.py
867 --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
868 +++ b/django/utils/file_locks.py        Fri Feb 08 15:41:48 2008 -0500
869 @@ -0,0 +1,50 @@
870 +"""
871 +Locking portability by Jonathan Feignberg <jdf@pobox.com> in python cookbook
872 +
873 +Example Usage::
874 +
875 +    from django.utils import file_locks
876 +
877 +    f = open('./file', 'wb')
878 +
879 +    file_locks.lock(f, file_locks.LOCK_EX)
880 +    f.write('Django')
881 +    f.close()
882 +"""
883 +
884 +
885 +import os
886 +
887 +__all__ = ['LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock']
888 +
889 +if os.name == 'nt':
890 +       import win32con
891 +       import win32file
892 +       import pywintypes
893 +       LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
894 +       LOCK_SH = 0
895 +       LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
896 +       __overlapped = pywintypes.OVERLAPPED()
897 +elif os.name == 'posix':
898 +       import fcntl
899 +       LOCK_EX = fcntl.LOCK_EX
900 +       LOCK_SH = fcntl.LOCK_SH
901 +       LOCK_NB = fcntl.LOCK_NB
902 +else:
903 +       raise RuntimeError("Locking only defined for nt and posix platforms")
904 +
905 +if os.name == 'nt':
906 +       def lock(file, flags):
907 +               hfile = win32file._get_osfhandle(file.fileno())
908 +               win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
909 +
910 +       def unlock(file):
911 +               hfile = win32file._get_osfhandle(file.fileno())
912 +               win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
913 +
914 +elif os.name =='posix':
915 +       def lock(file, flags):
916 +               fcntl.flock(file.fileno(), flags)
917 +
918 +       def unlock(file):
919 +               fcntl.flock(file.fileno(), fcntl.LOCK_UN)
920 diff -r 8f50398714c1 -r ea52e616a876 docs/forms.txt
921 --- a/docs/forms.txt    Fri Feb 08 07:01:23 2008 -0500
922 +++ b/docs/forms.txt    Fri Feb 08 15:41:48 2008 -0500
923 @@ -475,6 +475,19 @@ this::
924     new_data = request.POST.copy()
925     new_data.update(request.FILES)
926  
927 +Streaming file uploads.
928 +-----------------------
929 +
930 +File uploads will be read into memory by default. This works fine for
931 +small to medium sized uploads (from 1MB to 100MB depending on your
932 +setup and usage). If you want to support larger uploads you can enable
933 +upload streaming where only a small part of the file will be in memory
934 +at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
935 +setting (see the settings_ document for more details).
936 +
937 +See `request object`_ for more details about ``request.FILES`` objects
938 +with streaming file uploads enabled.
939 +
940  Validators
941  ==========
942  
943 @@ -698,3 +711,4 @@ fails. If no message is passed in, a def
944  .. _`generic views`: ../generic_views/
945  .. _`models API`: ../model-api/
946  .. _settings: ../settings/
947 +.. _request object: ../request_response/#httprequest-objects
948 diff -r 8f50398714c1 -r ea52e616a876 docs/request_response.txt
949 --- a/docs/request_response.txt Fri Feb 08 07:01:23 2008 -0500
950 +++ b/docs/request_response.txt Fri Feb 08 15:41:48 2008 -0500
951 @@ -82,12 +82,24 @@ All attributes except ``session`` should
952  ``FILES``
953      A dictionary-like object containing all uploaded files. Each key in
954      ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
955 -    value in ``FILES`` is a standard Python dictionary with the following three
956 +    value in ``FILES`` is a standard Python dictionary with the following four
957      keys:
958  
959          * ``filename`` -- The name of the uploaded file, as a Python string.
960          * ``content-type`` -- The content type of the uploaded file.
961          * ``content`` -- The raw content of the uploaded file.
962 +        * ``content-length`` -- The length of the content in bytes.
963 +
964 +    If streaming file uploads are enabled two additional keys
965 +    describing the uploaded file will be present:
966 +
967 +       * ``tmpfilename`` -- The filename for the temporary file.
968 +       * ``tmpfile`` -- An open file object for the temporary file.
969 +
970 +    The temporary file will be removed when the request finishes.
971 +
972 +    Note that accessing