Context Navigation

Back to Ticket #2070

Ticket #2070: 2070_revision7339_uploadhandling.diff

File 2070_revision7339_uploadhandling.diff, 38.8 KB (added by Michael Axiak, 18 years ago)
NEW Upload handling for revision 7339

django/http/uploadedfile.py

+"""
+The uploaded file objects for Django.
+This contains the base UploadedFile and the TemporaryUploadedFile
+derived class.
+"""
+__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile')
+class UploadedFile(object):
+    """
+    The UploadedFile object behaves somewhat like a file
+    object and represents some data that the user submitted
+    and is stored in some form.
+    """
+    DEFAULT_CHUNK_SIZE = 64 * 2**10
+    def __init__(self):
+        self.file_size = None
+        self.file_name = None
+        self.content_type = None
+        self.charset = None
+        pass
+    def file_size(self):
+        return self.file_size
+    def chunk(chunk_size=None):
+        """
+        Read the file to generate chunks of chunk_size bytes.
+        """
+        if not chunk_size:
+            chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
+        if hasattr(self, 'seek'):
+            self.seek(0)
+        # Assume the pointer is at zero...
+        counter = self.file_size()
+        while counter > 0:
+            yield self.read(chunk_size)
+            counter -= chunk_size
+    def multiple_chunks(self, chunk_size=None):
+        """
+        Return True if you can expect multiple chunks, False otherwise.
+        Note: If a particular file representation is in memory, then
+              override this to return False.
+        """
+        if not chunk_size:
+            chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
+        return self.file_size() < chunk_size
+    def read(self, num_bytes=None):
+        """
+        Read from the file in whatever representation it has.
+        """
+        raise NotImplementedError()
+    def open(self):
+        """
+        Open the file, if one needs to.
+        """
+        pass
+    def close(self):
+        """
+        Close the file, if one needs to.
+        """
+        pass
+    def __getitem__(self, key):
+        """
+        This maintains backwards compatibility.
+        """
+        import warnings
+        warnings.warn("The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.", DeprecationWarning)
+        # Dictionary to translate labels
+        # for backwards compatbility.
+        # Should be removed at some point.
+        backwards_translate = {
+            'filename': 'file_name',
+            'content-type': 'content_type',
+            }
+        if key == 'content':
+            return self.read()
+        else:
+            return getattr(self, backwards_translate.get(key, key))
+    def __repr__(self):
+        """
+        This representation could be anything and can be overridden.
+        This is mostly done to make it look somewhat useful.
+        """
+        _dict = {
+            'file_name': self.file_name,
+            'content_type': self.content_type,
+            'content': '<omitted>',
+            }
+        return repr(_dict)
+class TemporaryUploadedFile(UploadedFile):
+    """
+    Upload a file to a temporary file.
+    """
+    def __init__(self, file, file_name, content_type, file_size, charset):
+        self.file = file
+        self.file_name = file_name
+        self.path = file.name
+        self.content_type = content_type
+        self.file_size = file_size
+        self.charset = charset
+        self.file.seek(0)
+    def temporary_file_path(self):
+        """
+        Return the full path of this file.
+        """
+        return self.path
+    def read(self, *args, **kwargs):
+        return self.file.read(*args, **kwargs)
+    def open(self):
+        """
+        Assume the person meant to seek.
+        """
+        self.seek(0)
+    def seek(self, *args, **kwargs):
+        self.file.seek(*args, **kwargs)
+class InMemoryUploadedFile(UploadedFile):
+    """
+    Upload a file into memory.
+    """
+    def __init__(self, file, field_name, file_name, content_type, charset):
+        self.file = file
+        self.field_name = field_name
+        self.file_name = file_name
+        self.content_type = content_type
+        self.charset = charset
+        self.file.seek(0)
+    def seek(self, *args, **kwargs):
+        self.file.seek(*args, **kwargs)
+    def open(self):
+        self.seek(0)
+    def read(self, *args, **kwargs):
+        return self.file.read(*args, **kwargs)
+    def chunk(chunk_size=None):
+        """
+        Return the entirety of the data regardless.
+        """
+        self.file.seek(0)
+        return self.read()
+    def multiple_chunks(self, chunk_size=None):
+        """
+        Since it's in memory, we'll never have multiple chunks.
+        """
+        return False

django/http/multipartparser.py

+"""
+MultiPart parsing for file uploads.
+This object will take the file upload headers
+and the file upload handler and chunk the upload
+data for the handler to deal with.
+"""
+from django.utils.datastructures import MultiValueDict
+from django.utils.encoding import force_unicode
+__all__ = ('MultiPartParser','MultiPartParserError','InputStreamExhausted')
+class MultiPartParserError(Exception):
+    pass
+class InputStreamExhausted(Exception):
+    """ No more reads are allowed from this device. """
+    pass
+class MultiPartParser(object):
+    """
+    A rfc2388 multipart/form-data parser.
+    parse() reads the input stream in chunk_size chunks and returns a
+    tuple of (POST MultiValueDict, FILES MultiValueDict). If
+    file_upload_dir is defined files will be streamed to temporary
+    files in the specified directory.
+    """
+    def __init__(self, META, input_data, upload_handlers, encoding=None):
+        """
+        Initialize the MultiPartParser object.
+        *META* -- The standard META dictionary in Django request objects.
+        *input_data* -- The raw post data, as a bytestring.
+        *upload_handler* -- An object of type UploadHandler
+                            that performs operations on the uploaded
+                            data.
+        *encoding* -- The encoding with which to treat the incoming data.
+        """
+        # Import cgi utilities for (near) future use.
+        global parse_header, valid_boundary, settings
+        from django.conf import settings
+        from cgi import valid_boundary, parse_header
+        #######
+        # Check basic headers
+        #######
+        #
+        # Content-Type should containt multipart and the boundary information.
+        ####
+        content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
+        if not content_type.startswith('multipart/'):
+            raise MultiPartParserError('Invalid Content-Type: %s' %
+                                       content_type)
+        # Parse the header to get the boundary to split the parts.
+        ctypes, opts = parse_header(content_type)
+        boundary = opts.get('boundary')
+        if not boundary or not valid_boundary(boundary):
+            raise MultiPartParserError('Invalid boundary in multipart: %s' %
+                                       boundary)
+        #
+        # Content-Length should contain the length of the body we are about
+        # to receive.
+        ####
+        try:
+            content_length = int(META.get('HTTP_CONTENT_LENGTH',
+                                          META.get('CONTENT_LENGTH',0)))
+        except (ValueError, TypeError):
+            # For now set it to 0...we'll try again later on down.
+            content_length = 0
+        # If we have better knowledge of how much
+        # data is remaining in the request stream,
+        # we should use that. (modpython for instance)
+        #try:
+        #    remaining = input_data.remaining
+        #    if remaining is not None and \
+        #            (content_length is None or remaining < content_length):
+        #        content_length = remaining
+        #except AttributeError:
+        #    pass
+        if not content_length:
+            # This means we shouldn't continue...raise an error.
+            raise MultiPartParserError("Invalid content length: %r" % content_length)
+        # For now, just use the first upload handler:
+        upload_handler = upload_handlers[0]
+        self._boundary = boundary
+        self._input_data = input_data
+        # For compatibility with low-level network APIs,
+        # the chunk size should be <= 2^31 - 1:
+        self._chunk_size = min(upload_handler.chunk_size, 2147483647)
+        self._post = MultiValueDict()
+        self._files = MultiValueDict()
+        self._encoding = encoding or settings.DEFAULT_CHARSET
+        self._content_length = content_length
+        self._upload_handler = upload_handler
+    def parse(self):
+        """
+        Parse the POST data and break it into a FILES MultiValueDict
+        and a POST MultiValueDict.
+           *returns* -- A tuple containing the POST and FILES dictionary,
+                        respectively.
+        """
+        from base64 import b64decode
+        from django.http.fileuploadhandler import StopUpload, SkipFile
+        encoding = self._encoding
+        handler = self._upload_handler
+        limited_input_data = LimitBytes(self._input_data, self._content_length)
+        # Instantiate the parser and stream:
+        stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
+        for item_type, meta_data, stream in Parser(stream, self._boundary):
+            try:
+                disposition = meta_data['content-disposition'][1]
+                field_name = disposition['name'].strip()
+            except (KeyError, IndexError, AttributeError):
+                continue
+            transfer_encoding = meta_data.get('content-transfer-encoding')
+            if item_type == 'FIELD':
+                # This is a post field, we can just set it in the post
+                if transfer_encoding == 'base64':
+                    raw_data = stream.read()
+                    try:
+                        data = b64decode(raw_data)
+                    except TypeError:
+                        data = raw_data
+                else:
+                    data = stream.read()
+                self._post.appendlist(force_unicode(field_name, encoding, errors='replace'),
+                                      force_unicode(data, encoding, errors='replace'))
+            elif item_type == 'FILE':
+                # This is a file, use the handler...
+                file_successful = True
+                file_name = self.IE_sanitize(disposition.get('filename'))
+                if not file_name:
+                    continue
+                file_name = force_unicode(file_name, encoding, errors='replace')
+                content_type = meta_data.get('content-type', ('',))[0].strip()
+                try:
+                    charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
+                except:
+                    charset = None
+                try:
+                    content_length = int(meta_data.get('content-length')[0])
+                except (IndexError, TypeError, ValueError):
+                    content_length = None
+                counter = 0
+                try:
+                    handler.new_file(field_name, file_name,
+                                     content_type, content_length,
+                                     charset)
+                    for chunk in stream:
+                        if transfer_encoding == 'base64':
+                            # We only special-case base64 transfer encoding
+                            try:
+                                chunk = b64decode(chunk)
+                            except TypeError, e:
+                                raise MultiValueParseError("Could not decode base64 data: %r" % e)
+                        chunk_length = len(chunk)
+                        counter += chunk_length
+                        handler.receive_data_chunk(chunk,
+                                                   counter - chunk_length,
+                                                   counter)
+                except (StopUpload, SkipFile), e:
+                    file_successful = False
+                    if isinstance(e, SkipFile):
+                        # Just use up the rest of this file...
+                        stream.exhaust()
+                    elif isinstance(e, StopUpload):
+                        # Abort the parsing and break
+                        parser.abort()
+                        break
+                else:
+                    # Only do this if the handler didn't raise an abort error
+                    file_obj = handler.file_complete(counter)
+                    if file_obj:
+                        # If it returns a file object, then set the files dict.
+                        self._files.appendlist(force_unicode(field_name,
+                                                             encoding,
+                                                             errors='replace'),
+                                               file_obj)
+            else:
+                stream.exhuast()
+        # Make sure that the request data is all fed
+        limited_input_data.exhaust()
+        # Signal that the upload has completed.
+        handler.upload_complete()
+        return self._post, self._files
+    def IE_sanitize(self, filename):
+        """cleanup filename from IE full paths"""
+        return filename and filename[filename.rfind("\\")+1:].strip()
+class LazyStream(object):
+    def __init__(self, producer, length=None):
+        """
+        Every LazyStream must have a producer when instantiated.
+        A producer is an iterable that returns a string each time it
+        is called.
+        """
+        self._producer = producer
+        self._empty = False
+        self._leftover = ''
+        self.length = length
+        self.position = 0
+        self._remaining = length
+    def tell(self):
+        return self.position
+    def read(self, size=None):
+        def parts():
+            remaining = (size is not None and [size] or [self._remaining])[0]
+            # do the whole thing in one shot if no limit was provided.
+            if remaining is None:
+                yield ''.join(self)
+                return
+            # otherwise do some bookkeeping to return exactly enough
+            # of the stream and stashing any extra content we get from
+            # the producer
+            while remaining != 0:
+                assert remaining > 0, 'remaining bytes to read should never go negative'
+                chunk = self.next()
+                emitting = chunk[:remaining]
+                self.unget(chunk[remaining:])
+                remaining -= len(emitting)
+                yield emitting
+        out = ''.join(parts())
+        self.position += len(out)
+        return out
+    def next(self):
+        """
+        Used when the exact number of bytes to read is unimportant.
+        This procedure just returns whatever is chunk is conveniently
+        returned from the iterator instead. Useful to avoid
+        unnecessary bookkeeping if performance is an issue.
+        """
+        if self._leftover:
+            output = self._leftover
+            self.position += len(output)
+            self._leftover = ''
+            return output
+        else:
+            output = self._producer.next()
+            self.position += len(output)
+            return output
+    def close(self):
+        """
+        Used to invalidate/disable this lazy stream.
+        Replaces the producer with an empty list. Any leftover bytes
+        that have already been read will still be reported upon read()
+        and/or next().
+        """
+        self._producer = []
+    def __iter__(self):
+        return self
+    def unget(self, bytes):
+        """
+        Places bytes back onto the front of the lazy stream.
+        Future calls to read() will return those bytes first. The
+        stream position and thus tell() will be rewound.
+        """
+        self.position -= len(bytes)
+        self._leftover = ''.join([bytes, self._leftover])
+    def exhaust(self):
+        """
+        Exhausts the entire underlying stream.
+        Useful for skipping and advancing sections.
+        """
+        for thing in self:
+            pass
+class ChunkIter(object):
+    def __init__(self, flo, chunk_size=1024**2):
+        self.flo = flo
+        self.chunk_size = chunk_size
+    def next(self):
+        try:
+            data = self.flo.read(self.chunk_size)
+        except InputStreamExhausted:
+            raise StopIteration
+        if data:
+            return data
+        else:
+            raise StopIteration
+    def __iter__(self):
+        return self
+class LimitBytes(object):
+    """ Limit bytes for a file object. """
+    def __init__(self, fileobject, length):
+        self._file = fileobject
+        self.remaining = length
+    def read(self, num_bytes=None):
+        """
+        Read data from the underlying file.
+        If you ask for too much or there isn't anything left,
+        this will raise an InputStreamExhausted error.
+        """
+        if self.remaining <= 0:
+            raise InputStreamExhausted()
+        num_bytes = min(num_bytes, self.remaining)
+        self.remaining -= num_bytes
+        return self._file.read(num_bytes)
+    def exhaust(self):
+        """
+        Exhaust this file until all of the bytes it was limited by
+        have been read.
+        """
+        while self.remaining > 0:
+            num_bytes = min(self.remaining, 16384)
+            __ = self._file.read(num_bytes)
+            self.remaining -= num_bytes
+class InterBoundaryIter(object):
+    """
+    A Producer that will iterate over boundaries.
+    """
+    def __init__(self, stream, boundary):
+        self._stream = stream
+        self._boundary = boundary
+    def __iter__(self):
+        return self
+    def next(self):
+        try:
+            return LazyStream(BoundaryIter(self._stream, self._boundary))
+        except InputStreamExhausted:
+            raise StopIteration
+class BoundaryIter(object):
+    """
+    A Producer that is sensitive to boundaries.
+    Will happily yield bytes until a boundary is found. Will yield the
+    bytes before the boundary, throw away the boundary bytes
+    themselves, and push the post-boundary bytes back on the stream.
+    The future calls to .next() after locating the boundary will raise
+    a StopIteration exception.
+    """
+    def __init__(self, stream, boundary):
+        self._stream = stream
+        self._boundary = boundary
+        self._done = False
+        # rollback an additional six bytes because the format is like
+        # this: CRLF<boundary>[--CRLF]
+        self._rollback = len(boundary) + 6
+        # Try to use mx fast string search if available. Otherwise
+        # use Python find. Wrap the latter for consistency.
+        unused_char = self._stream.read(1)
+        if not unused_char:
+            raise InputStreamExhausted
+        self._stream.unget(unused_char)
+        try:
+            from mx.TextTools import FS
+            self._fs = FS(boundary).find
+        except ImportError:
+            self._fs = lambda data: data.find(boundary)
+    def __iter__(self):
+        return self
+    def next(self):
+        if self._done:
+            raise StopIteration
+        stream = self._stream
+        rollback = self._rollback
+        bytes_read = 0
+        chunks = []
+        for bytes in stream:
+            bytes_read += len(bytes)
+            chunks.append(bytes)
+            if bytes_read > rollback:
+                break
+            if not bytes:
+                break
+        else:
+            self._done = True
+        if not chunks:
+            raise StopIteration
+        chunk = ''.join(chunks)
+        boundary = self._find_boundary(chunk, len(chunk) < self._rollback)
+        if boundary:
+            end, next = boundary
+            stream.unget(chunk[next:])
+            self._done = True
+            return chunk[:end]
+        else:
+            # make sure we dont treat a partial boundary (and
+            # its separators) as data
+            if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6):
+                # There's nothing left, we should just return and mark as done.
+                self._done = True
+                return chunk
+            else:
+                stream.unget(chunk[-rollback:])
+                return chunk[:-rollback]
+    def _find_boundary(self, data, eof = False):
+        """
+        Finds a multipart boundary in data.
+        Should no boundry exist in the data None is returned
+        instead. Otherwise a tuple containing
+        the indices of the following are returned:
+         * the end of current encapsulation
+         * the start of the next encapsulation
+        """
+        index = self._fs(data)
+        if index < 0:
+            return None
+        else:
+            end = index
+            next = index + len(self._boundary)
+            data_len = len(data) - 1
+            # backup over CRLF
+            if data[max(0,end-1)] == '\n': end -= 1
+            if data[max(0,end-1)] == '\r': end -= 1
+            # skip over --CRLF
+            if data[min(data_len,next)] == '-': next += 1
+            if data[min(data_len,next)] == '-': next += 1
+            if data[min(data_len,next)] == '\r': next += 1
+            if data[min(data_len,next)] == '\n': next += 1
+            return end, next
+def ParseBoundaryStream(stream, max_header_size):
+        """
+        Parses one and exactly one stream that encapsulates a boundary.
+        """
+        # Stream at beginning of header, look for end of header
+        # and parse it if found. The header must fit within one
+        # chunk.
+        chunk = stream.read(max_header_size)
+        # 'find' returns the top of these four bytes, so we'll
+        # need to munch them later to prevent them from polluting
+        # the payload.
+        header_end = chunk.find('\r\n\r\n')
+        def parse_header(line):
+            from cgi import parse_header
+            main_value_pair, params = parse_header(line)
+            try:
+                name, value = main_value_pair.split(':', 1)
+            except:
+                raise ValueError("Invalid header: %r" % line)
+            return name, (value, params)
+        if header_end == -1:
+            # we find no header, so we just mark this fact and pass on
+            # the stream verbatim
+            stream.unget(chunk)
+            return ('RAW', {}, stream)
+        header = chunk[:header_end]
+        # here we place any excess chunk back onto the stream, as
+        # well as throwing away the CRLFCRLF bytes from above.
+        stream.unget(chunk[header_end + 4:])
+        is_file_field = False
+        outdict = {}
+        # eliminate blank lines
+        for line in header.split('\r\n'):
+            # This terminology ("main value" and "dictionary of
+            # parameters") is from the Python docs.
+            name, (value, params) = parse_header(line)
+            if name == 'content-disposition' and params.get('filename'):
+                is_file_field = True
+            outdict[name] = value, params
+        if is_file_field:
+            return ('FILE', outdict, stream)
+        else:
+            return ('FIELD', outdict, stream)
+class Parser(object):
+    def __init__(self, stream, boundary):
+        self._stream = stream
+        self._separator = '--' + boundary
+    def __iter__(self):
+        boundarystream = InterBoundaryIter(self._stream,
+                                           self._separator)
+        for sub_stream in boundarystream:
+            # Iterate over each part
+            yield ParseBoundaryStream(sub_stream, 1024)

django/http/init.py

 from django.utils.datastructures import MultiValueDict, FileDict
 from django.utils.encoding import smart_str, iri_to_uri, force_unicode
+from django.http.multipartparser import MultiPartParser
 from utils import *
 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
 …
         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
         self.path = ''
         self.method = None
+        self.upload_handlers = []
     def __repr__(self):
         return '<HttpRequest\nGET:%s,\nPOST:%s,\nCOOKIES:%s,\nMETA:%s>' % \
 …
     encoding = property(_get_encoding, _set_encoding)
+def parse_file_upload(header_dict, post_data):
+    """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
+    import email, email.Message
+    from cgi import parse_header
+    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
+    raw_message += '\r\n\r\n' + post_data
+    msg = email.message_from_string(raw_message)
+    POST = QueryDict('', mutable=True)
+    FILES = MultiValueDict()
+    for submessage in msg.get_payload():
+        if submessage and isinstance(submessage, email.Message.Message):
+            name_dict = parse_header(submessage['Content-Disposition'])[1]
+            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
+            # or {'name': 'blah'} for POST fields
+            # We assume all uploaded files have a 'filename' set.
+            if 'filename' in name_dict:
+                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
+                if not name_dict['filename'].strip():
+                    continue
+                # IE submits the full path, so trim everything but the basename.
+                # (We can't use os.path.basename because that uses the server's
+                # directory separator, which may not be the same as the
+                # client's one.)
+                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
+                FILES.appendlist(name_dict['name'], FileDict({
+                    'filename': filename,
+                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
+                    'content': submessage.get_payload(),
+                }))
+    def set_upload_handler(self, upload_handler):
+        """
+        Set the upload handler to the new handler given in the parameter.
+        """
+        if hasattr(self, '_files'):
+            raise AttributeError("You cannot set the upload handler after the upload has been processed.")
+        self.upload_handlers = [upload_handler]
+    def parse_file_upload(self, META, post_data):
+        """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
+        from django.http.fileuploadhandler import TemporaryFileUploadHandler, MemoryFileUploadHandler
+        if not self.upload_handlers:
+            try:
+                content_length = int(META.get('HTTP_CONTENT_LENGTH',
+                                              META.get('CONTENT_LENGTH',0)))
+            except (ValueError, TypeError):
+                content_length = 0
+            if content_length and content_length > 1048576:
+                # If the header is big enough, use temporary files.
+                self.upload_handlers = [TemporaryFileUploadHandler()]
             else:
                 POST.appendlist(name_dict['name'], submessage.get_payload())
     return POST, FILES
+                self.upload_handlers = [TemporaryFileUploadHandler()]
+                #self.upload_handlers = [MemoryFileUploadHandler()]
+        parser = MultiPartParser(META, post_data, self.upload_handlers,
+                                 self.encoding)
+        return parser.parse()
 class QueryDict(MultiValueDict):
     """
     A specialized MultiValueDict that takes a query string when initialized.

django/http/fileuploadhandler.py

+""" A fileuploadhandler base and default subclass for handling file uploads.
+"""
+import os
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+from django.http.uploadedfile import TemporaryUploadedFile, InMemoryUploadedFile
+__all__ = ('UploadFileException','StopUpload', 'SkipFile',
+           'FileUploadHandler', 'TemporaryFileUploadHandler',
+           'MemoryFileUploadHandler')
+class UploadFileException(Exception):
+    """ Any error having to do with Uploading Files. """
+    pass
+class StopUpload(UploadFileException):
+    """ This exception is raised when an upload must abort. """
+    pass
+class SkipFile(UploadFileException):
+    """ This exception is raised when a file needs to be skipped. """
+    pass
+class FileUploadHandler(object):
+    """ FileUploadHandler will take data and handle file uploads
+    in a streamed fashion.
+    """
+    chunk_size = 64 * 2 ** 10 #: The default chunk size is 64 KB.
+    def __init__(self):
+        " Initialize some local variables. "
+        self.file_name = None
+        self.content_type = None
+        self.content_length = None
+        self.charset = None
+    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
+        """
+        Signal that a new file has been started.
+        Warning: Do not trust content_length, if you get it at all.
+        """
+        self.field_name = field_name
+        self.file_name = file_name
+        self.content_type = content_type
+        self.content_length = content_length
+        self.charset = charset
+    def receive_data_chunk(self, raw_data, start, stop):
+        """
+        Receive data from the streamed upload parser.
+        Start and stop are the positions in the file.
+        This equality should always be true::
+            len(raw_data) = stop - start
+        """
+        raise NotImplementedError()
+    def file_complete(self, file_size):
+        """
+        Signal that a file has completed.
+        File size corresponds to the actual size accumulated
+        by all the chunks.
+        This should return a valid UploadedFile object.
+        """
+        raise NotImplementedError()
+    def upload_complete(self):
+        """
+        Signal that the upload is complete.
+        Do any cleanup that is necessary for this handler.
+        """
+        pass
+class TemporaryFileUploadHandler(FileUploadHandler):
+    """
+    Upload the streaming data into a temporary file.
+    """
+    def __init__(self, *args, **kwargs):
+        """ Import settings for later. """
+        super(TemporaryFileUploadHandler, self).__init__(*args, **kwargs)
+        global settings
+        from django.conf import settings
+    def new_file(self, file_name, *args, **kwargs):
+        """
+        Create the file object to append to as data is coming in.
+        """
+        super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs)
+        self.file = TemporaryFile(settings.FILE_UPLOAD_DIR)
+        self.write = self.file.write
+    def receive_data_chunk(self, raw_data, start, stop):
+        """
+        Once we get the data, we will save it to our file.
+        """
+        self.write(raw_data)
+    def file_complete(self, file_size):
+        """
+        Signal that a file has completed.
+        File size corresponds to the actual size accumulated
+        by all the chunks.
+        This should return a valid UploadedFile object.
+        """
+        self.file.seek(0)
+        return TemporaryUploadedFile(self.file, self.file_name,
+                                     self.content_type, file_size,
+                                     self.charset)
+class TemporaryFile(object):
+    """
+    A temporary file that tries to delete itself when garbage collected.
+    """
+    def __init__(self, dir):
+        import tempfile
+        if not dir:
+            dir = tempfile.gettempdir()
+        (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
+        self.file = os.fdopen(fd, 'w+b')
+        self.name = name
+    def __getattr__(self, name):
+        a = getattr(self.__dict__['file'], name)
+        if type(a) != type(0):
+            setattr(self, name, a)
+        return a
+    def __del__(self):
+        try:
+            os.unlink(self.name)
+        except OSError:
+            pass
+class MemoryFileUploadHandler(FileUploadHandler):
+    """
+    The MemoryFileUploadHandler will place the data directly into memory.
+    """
+    chunk_size = 32 * 2 ** 40 #: Make the chunk size huge
+    def __init__(self):
+        " Initialize some local variables. "
+        self.file_name = None
+        self.content_type = None
+        self.content_length = None
+    def new_file(self, field_name, file_name, content_type, content_length, charset):
+        """
+        Set the local data variable to empty.
+        """
+        self.field_name = field_name
+        self.file_name = file_name
+        self.content_type = content_type
+        self.content_length = content_length
+        self.charset = charset
+        self.file = StringIO()
+    def receive_data_chunk(self, raw_data, start, stop):
+        """
+        Append the data to the local data variable.
+        """
+        self.file.write(raw_data)
+    def file_complete(self, file_size):
+        """
+        Return an in memory file object.
+        """
+        current_file = self.file
+        self.file = None #: Decrement the ref count
+        return InMemoryUploadedFile(current_file, self.field_name,
+                                    self.file_name, self.content_type,
+                                    self.charset)

django/conf/global_settings.py

 # Example: "http://media.lawrence.com"
 MEDIA_URL = ''
+# Directory to upload streamed files temporarily.
+# A value of `None` means that it will use the default temporary
+# directory for the server's operating system.
+FILE_UPLOAD_DIR = None
 # Default formatting for date objects. See all available format strings here:
 # http://www.djangoproject.com/documentation/templates/#now
 DATE_FORMAT = 'N j, Y'

django/core/handlers/wsgi.py

         self.path = force_unicode(environ['PATH_INFO'])
         self.META = environ
         self.method = environ['REQUEST_METHOD'].upper()
+        self.upload_handlers = []
     def __repr__(self):
         # Since this is called as part of error handling, we need to be very
 …
         # Populates self._post and self._files
         if self.method == 'POST':
             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
+                header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
+                header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
+                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
+                self._raw_post_data = ''
+                self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
             else:
                 self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
         else:

django/core/handlers/modpython.py

     def __init__(self, req):
         self._req = req
         self.path = force_unicode(req.uri)
+        self.upload_handlers = []
     def __repr__(self):
         # Since this is called as part of error handling, we need to be very
 …
     def _load_post_and_files(self):
         "Populates self._post and self._files"
         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
+            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
+            self._raw_post_data = ''
+            self._post, self._files = self.parse_file_upload(self.META, self._req)
         else:
             self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()

django/core/files/filemove.py

+import os
+__all__ = ('file_move_safe',)
+try:
+    import shutil
+    file_move = shutil.move
+except ImportError:
+    file_move = os.rename
+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
+    """
+    Moves a file from one location to another in the safest way possible.
+    First, it tries using shutils.move, which is OS-dependent but doesn't
+    break with change of filesystems. Then it tries os.rename, which will
+    break if it encounters a change in filesystems. Lastly, it streams
+    it manually from one file to another in python.
+    Without ``allow_overwrite``, if the destination file exists, the
+    file will raise an IOError.
+    """
+    from django.core.files import filelocks
+    if old_file_name == new_file_name:
+        # No file moving takes place.
+        return
+    if not allow_overwrite and os.path.exists(new_file_name):
+        raise IOError, "Django does not allow overwriting files."
+    try:
+        file_move(old_file_name, new_file_name)
+        return
+    except OSError: # moving to another filesystem
+        pass
+    new_file = open(new_file_name, 'wb')
+    # exclusive lock
+    filelocks.lock(new_file, filelocks.LOCK_EX)
+    old_file = open(old_file_name, 'rb')
+    current_chunk = None
+    while current_chunk != '':
+        current_chunk = old_file.read(chunk_size)
+        new_file.write(current_chunk)
+    new_file.close()
+    old_file.close()
+    os.remove(old_file_name)

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #2070: 2070_revision7339_uploadhandling.diff

django/http/uploadedfile.py

django/http/multipartparser.py

django/http/init.py

django/http/fileuploadhandler.py

django/conf/global_settings.py

django/core/handlers/wsgi.py

django/core/handlers/modpython.py

django/core/files/filemove.py

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us