Ticket #2070: 2070_revision7339_uploadhandling.diff

File 2070_revision7339_uploadhandling.diff, 38.8 KB (added by Michael Axiak, 16 years ago)

NEW Upload handling for revision 7339

  • django/http/uploadedfile.py

     
     1"""
     2The uploaded file objects for Django.
     3This contains the base UploadedFile and the TemporaryUploadedFile
     4derived class.
     5"""
     6
     7__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile')
     8
     9class UploadedFile(object):
     10    """
     11    The UploadedFile object behaves somewhat like a file
     12    object and represents some data that the user submitted
     13    and is stored in some form.
     14    """
     15    DEFAULT_CHUNK_SIZE = 64 * 2**10
     16
     17    def __init__(self):
     18        self.file_size = None
     19        self.file_name = None
     20        self.content_type = None
     21        self.charset = None
     22        pass
     23
     24    def file_size(self):
     25        return self.file_size
     26
     27    def chunk(chunk_size=None):
     28        """
     29        Read the file to generate chunks of chunk_size bytes.
     30        """
     31        if not chunk_size:
     32            chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
     33
     34        if hasattr(self, 'seek'):
     35            self.seek(0)
     36        # Assume the pointer is at zero...
     37        counter = self.file_size()
     38
     39        while counter > 0:
     40            yield self.read(chunk_size)
     41            counter -= chunk_size
     42
     43
     44    def multiple_chunks(self, chunk_size=None):
     45        """
     46        Return True if you can expect multiple chunks, False otherwise.
     47        Note: If a particular file representation is in memory, then
     48              override this to return False.
     49        """
     50        if not chunk_size:
     51            chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
     52        return self.file_size() < chunk_size
     53       
     54
     55    def read(self, num_bytes=None):
     56        """
     57        Read from the file in whatever representation it has.
     58        """
     59        raise NotImplementedError()
     60
     61    def open(self):
     62        """
     63        Open the file, if one needs to.
     64        """
     65        pass
     66
     67
     68    def close(self):
     69        """
     70        Close the file, if one needs to.
     71        """
     72        pass
     73
     74    def __getitem__(self, key):
     75        """
     76        This maintains backwards compatibility.
     77        """
     78        import warnings
     79        warnings.warn("The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.", DeprecationWarning)
     80        # Dictionary to translate labels
     81        # for backwards compatbility.
     82        # Should be removed at some point.
     83        backwards_translate = {
     84            'filename': 'file_name',
     85            'content-type': 'content_type',
     86            }
     87
     88        if key == 'content':
     89            return self.read()
     90        else:
     91            return getattr(self, backwards_translate.get(key, key))
     92
     93    def __repr__(self):
     94        """
     95        This representation could be anything and can be overridden.
     96        This is mostly done to make it look somewhat useful.
     97        """
     98        _dict = {
     99            'file_name': self.file_name,
     100            'content_type': self.content_type,
     101            'content': '<omitted>',
     102            }
     103        return repr(_dict)
     104
     105
     106class TemporaryUploadedFile(UploadedFile):
     107    """
     108    Upload a file to a temporary file.
     109    """
     110
     111    def __init__(self, file, file_name, content_type, file_size, charset):
     112        self.file = file
     113        self.file_name = file_name
     114        self.path = file.name
     115        self.content_type = content_type
     116        self.file_size = file_size
     117        self.charset = charset
     118        self.file.seek(0)
     119
     120    def temporary_file_path(self):
     121        """
     122        Return the full path of this file.
     123        """
     124        return self.path
     125
     126    def read(self, *args, **kwargs):
     127        return self.file.read(*args, **kwargs)
     128
     129    def open(self):
     130        """
     131        Assume the person meant to seek.
     132        """
     133        self.seek(0)
     134
     135    def seek(self, *args, **kwargs):
     136        self.file.seek(*args, **kwargs)
     137
     138
     139class InMemoryUploadedFile(UploadedFile):
     140    """
     141    Upload a file into memory.
     142    """
     143    def __init__(self, file, field_name, file_name, content_type, charset):
     144        self.file = file
     145        self.field_name = field_name
     146        self.file_name = file_name
     147        self.content_type = content_type
     148        self.charset = charset
     149        self.file.seek(0)
     150
     151    def seek(self, *args, **kwargs):
     152        self.file.seek(*args, **kwargs)
     153
     154    def open(self):
     155        self.seek(0)
     156
     157    def read(self, *args, **kwargs):
     158        return self.file.read(*args, **kwargs)
     159
     160    def chunk(chunk_size=None):
     161        """
     162        Return the entirety of the data regardless.
     163        """
     164        self.file.seek(0)
     165        return self.read()
     166
     167    def multiple_chunks(self, chunk_size=None):
     168        """
     169        Since it's in memory, we'll never have multiple chunks.
     170        """
     171        return False
  • django/http/multipartparser.py

     
     1"""
     2MultiPart parsing for file uploads.
     3
     4This object will take the file upload headers
     5and the file upload handler and chunk the upload
     6data for the handler to deal with.
     7"""
     8from django.utils.datastructures import MultiValueDict
     9from django.utils.encoding import force_unicode
     10
     11__all__ = ('MultiPartParser','MultiPartParserError','InputStreamExhausted')
     12
     13class MultiPartParserError(Exception):
     14    pass
     15
     16class InputStreamExhausted(Exception):
     17    """ No more reads are allowed from this device. """
     18    pass
     19
     20class MultiPartParser(object):
     21    """
     22    A rfc2388 multipart/form-data parser.
     23
     24    parse() reads the input stream in chunk_size chunks and returns a
     25    tuple of (POST MultiValueDict, FILES MultiValueDict). If
     26    file_upload_dir is defined files will be streamed to temporary
     27    files in the specified directory.
     28    """
     29    def __init__(self, META, input_data, upload_handlers, encoding=None):
     30        """
     31        Initialize the MultiPartParser object.
     32
     33        *META* -- The standard META dictionary in Django request objects.
     34        *input_data* -- The raw post data, as a bytestring.
     35        *upload_handler* -- An object of type UploadHandler
     36                            that performs operations on the uploaded
     37                            data.
     38        *encoding* -- The encoding with which to treat the incoming data.
     39        """
     40        # Import cgi utilities for (near) future use.
     41        global parse_header, valid_boundary, settings
     42        from django.conf import settings
     43        from cgi import valid_boundary, parse_header
     44
     45        #######
     46        # Check basic headers
     47        #######
     48
     49        #
     50        # Content-Type should containt multipart and the boundary information.
     51        ####
     52
     53        content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
     54        if not content_type.startswith('multipart/'):
     55            raise MultiPartParserError('Invalid Content-Type: %s' %
     56                                       content_type)
     57
     58        # Parse the header to get the boundary to split the parts.
     59        ctypes, opts = parse_header(content_type)
     60        boundary = opts.get('boundary')
     61        if not boundary or not valid_boundary(boundary):
     62            raise MultiPartParserError('Invalid boundary in multipart: %s' %
     63                                       boundary)
     64
     65
     66        #
     67        # Content-Length should contain the length of the body we are about
     68        # to receive.
     69        ####
     70        try:
     71            content_length = int(META.get('HTTP_CONTENT_LENGTH',
     72                                          META.get('CONTENT_LENGTH',0)))
     73        except (ValueError, TypeError):
     74            # For now set it to 0...we'll try again later on down.
     75            content_length = 0
     76
     77        # If we have better knowledge of how much
     78        # data is remaining in the request stream,
     79        # we should use that. (modpython for instance)
     80        #try:
     81        #    remaining = input_data.remaining
     82        #    if remaining is not None and \
     83        #            (content_length is None or remaining < content_length):
     84        #        content_length = remaining
     85        #except AttributeError:
     86        #    pass
     87
     88        if not content_length:
     89            # This means we shouldn't continue...raise an error.
     90            raise MultiPartParserError("Invalid content length: %r" % content_length)
     91
     92
     93        # For now, just use the first upload handler:
     94        upload_handler = upload_handlers[0]
     95
     96        self._boundary = boundary
     97        self._input_data = input_data
     98
     99        # For compatibility with low-level network APIs,
     100        # the chunk size should be <= 2^31 - 1:
     101        self._chunk_size = min(upload_handler.chunk_size, 2147483647)
     102
     103        self._post = MultiValueDict()
     104        self._files = MultiValueDict()
     105        self._encoding = encoding or settings.DEFAULT_CHARSET
     106        self._content_length = content_length
     107        self._upload_handler = upload_handler
     108
     109    def parse(self):
     110        """
     111        Parse the POST data and break it into a FILES MultiValueDict
     112        and a POST MultiValueDict.
     113
     114           *returns* -- A tuple containing the POST and FILES dictionary,
     115                        respectively.
     116        """
     117        from base64 import b64decode
     118        from django.http.fileuploadhandler import StopUpload, SkipFile
     119
     120        encoding = self._encoding
     121        handler = self._upload_handler
     122
     123        limited_input_data = LimitBytes(self._input_data, self._content_length)
     124
     125        # Instantiate the parser and stream:
     126        stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
     127        for item_type, meta_data, stream in Parser(stream, self._boundary):
     128            try:
     129                disposition = meta_data['content-disposition'][1]
     130                field_name = disposition['name'].strip()
     131            except (KeyError, IndexError, AttributeError):
     132                continue
     133
     134            transfer_encoding = meta_data.get('content-transfer-encoding')
     135
     136            if item_type == 'FIELD':
     137                # This is a post field, we can just set it in the post
     138                if transfer_encoding == 'base64':
     139                    raw_data = stream.read()
     140                    try:
     141                        data = b64decode(raw_data)
     142                    except TypeError:
     143                        data = raw_data
     144                else:
     145                    data = stream.read()
     146
     147                self._post.appendlist(force_unicode(field_name, encoding, errors='replace'),
     148                                      force_unicode(data, encoding, errors='replace'))
     149            elif item_type == 'FILE':
     150                # This is a file, use the handler...
     151                file_successful = True
     152                file_name = self.IE_sanitize(disposition.get('filename'))
     153                if not file_name:
     154                    continue
     155
     156                file_name = force_unicode(file_name, encoding, errors='replace')
     157
     158                content_type = meta_data.get('content-type', ('',))[0].strip()
     159                try:
     160                    charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
     161                except:
     162                    charset = None
     163
     164                try:
     165                    content_length = int(meta_data.get('content-length')[0])
     166                except (IndexError, TypeError, ValueError):
     167                    content_length = None
     168
     169                counter = 0
     170                try:
     171                    handler.new_file(field_name, file_name,
     172                                     content_type, content_length,
     173                                     charset)
     174                    for chunk in stream:
     175                        if transfer_encoding == 'base64':
     176                            # We only special-case base64 transfer encoding
     177                            try:
     178                                chunk = b64decode(chunk)
     179                            except TypeError, e:
     180                                raise MultiValueParseError("Could not decode base64 data: %r" % e)
     181
     182                        chunk_length = len(chunk)
     183                        counter += chunk_length
     184                        handler.receive_data_chunk(chunk,
     185                                                   counter - chunk_length,
     186                                                   counter)
     187                except (StopUpload, SkipFile), e:
     188                    file_successful = False
     189                    if isinstance(e, SkipFile):
     190                        # Just use up the rest of this file...
     191                        stream.exhaust()
     192                    elif isinstance(e, StopUpload):
     193                        # Abort the parsing and break
     194                        parser.abort()
     195                        break
     196                else:
     197                    # Only do this if the handler didn't raise an abort error
     198                    file_obj = handler.file_complete(counter)
     199                    if file_obj:
     200                        # If it returns a file object, then set the files dict.
     201                        self._files.appendlist(force_unicode(field_name,
     202                                                             encoding,
     203                                                             errors='replace'),
     204                                               file_obj)
     205            else:
     206                stream.exhuast()
     207
     208        # Make sure that the request data is all fed
     209        limited_input_data.exhaust()
     210
     211        # Signal that the upload has completed.
     212        handler.upload_complete()
     213
     214        return self._post, self._files
     215
     216    def IE_sanitize(self, filename):
     217        """cleanup filename from IE full paths"""
     218        return filename and filename[filename.rfind("\\")+1:].strip()
     219
     220
     221class LazyStream(object):
     222    def __init__(self, producer, length=None):
     223        """
     224        Every LazyStream must have a producer when instantiated.
     225
     226        A producer is an iterable that returns a string each time it
     227        is called.
     228        """
     229        self._producer = producer
     230        self._empty = False
     231        self._leftover = ''
     232        self.length = length
     233        self.position = 0
     234        self._remaining = length
     235
     236    def tell(self):
     237        return self.position
     238
     239    def read(self, size=None):
     240        def parts():
     241            remaining = (size is not None and [size] or [self._remaining])[0]
     242            # do the whole thing in one shot if no limit was provided.
     243            if remaining is None:
     244                yield ''.join(self)
     245                return
     246
     247            # otherwise do some bookkeeping to return exactly enough
     248            # of the stream and stashing any extra content we get from
     249            # the producer
     250            while remaining != 0:
     251                assert remaining > 0, 'remaining bytes to read should never go negative'
     252
     253                chunk = self.next()
     254
     255                emitting = chunk[:remaining]
     256                self.unget(chunk[remaining:])
     257                remaining -= len(emitting)
     258                yield emitting
     259
     260        out = ''.join(parts())
     261        self.position += len(out)
     262        return out
     263
     264    def next(self):
     265        """
     266        Used when the exact number of bytes to read is unimportant.
     267
     268        This procedure just returns whatever is chunk is conveniently
     269        returned from the iterator instead. Useful to avoid
     270        unnecessary bookkeeping if performance is an issue.
     271        """
     272        if self._leftover:
     273            output = self._leftover
     274            self.position += len(output)
     275            self._leftover = ''
     276            return output
     277        else:
     278            output = self._producer.next()
     279            self.position += len(output)
     280            return output
     281
     282    def close(self):
     283        """
     284        Used to invalidate/disable this lazy stream.
     285
     286        Replaces the producer with an empty list. Any leftover bytes
     287        that have already been read will still be reported upon read()
     288        and/or next().
     289        """
     290        self._producer = []
     291
     292    def __iter__(self):
     293        return self
     294
     295    def unget(self, bytes):
     296        """
     297        Places bytes back onto the front of the lazy stream.
     298
     299        Future calls to read() will return those bytes first. The
     300        stream position and thus tell() will be rewound.
     301        """
     302        self.position -= len(bytes)
     303        self._leftover = ''.join([bytes, self._leftover])
     304
     305    def exhaust(self):
     306        """
     307        Exhausts the entire underlying stream.
     308
     309        Useful for skipping and advancing sections.
     310        """
     311        for thing in self:
     312            pass
     313
     314
     315class ChunkIter(object):
     316    def __init__(self, flo, chunk_size=1024**2):
     317        self.flo = flo
     318        self.chunk_size = chunk_size
     319
     320    def next(self):
     321        try:
     322            data = self.flo.read(self.chunk_size)
     323        except InputStreamExhausted:
     324            raise StopIteration
     325        if data:
     326            return data
     327        else:
     328            raise StopIteration
     329
     330    def __iter__(self):
     331        return self
     332
     333
     334class LimitBytes(object):
     335    """ Limit bytes for a file object. """
     336    def __init__(self, fileobject, length):
     337        self._file = fileobject
     338        self.remaining = length
     339
     340    def read(self, num_bytes=None):
     341        """
     342        Read data from the underlying file.
     343        If you ask for too much or there isn't anything left,
     344        this will raise an InputStreamExhausted error.
     345        """
     346        if self.remaining <= 0:
     347            raise InputStreamExhausted()
     348        num_bytes = min(num_bytes, self.remaining)
     349        self.remaining -= num_bytes
     350        return self._file.read(num_bytes)
     351
     352    def exhaust(self):
     353        """
     354        Exhaust this file until all of the bytes it was limited by
     355        have been read.
     356        """
     357        while self.remaining > 0:
     358            num_bytes = min(self.remaining, 16384)
     359            __ = self._file.read(num_bytes)
     360            self.remaining -= num_bytes
     361
     362
     363class InterBoundaryIter(object):
     364    """
     365    A Producer that will iterate over boundaries.
     366    """
     367    def __init__(self, stream, boundary):
     368        self._stream = stream
     369        self._boundary = boundary
     370
     371    def __iter__(self):
     372        return self
     373
     374    def next(self):
     375        try:
     376            return LazyStream(BoundaryIter(self._stream, self._boundary))
     377        except InputStreamExhausted:
     378            raise StopIteration
     379
     380class BoundaryIter(object):
     381    """
     382    A Producer that is sensitive to boundaries.
     383
     384    Will happily yield bytes until a boundary is found. Will yield the
     385    bytes before the boundary, throw away the boundary bytes
     386    themselves, and push the post-boundary bytes back on the stream.
     387
     388    The future calls to .next() after locating the boundary will raise
     389    a StopIteration exception.
     390    """
     391    def __init__(self, stream, boundary):
     392        self._stream = stream
     393        self._boundary = boundary
     394        self._done = False
     395        # rollback an additional six bytes because the format is like
     396        # this: CRLF<boundary>[--CRLF]
     397        self._rollback = len(boundary) + 6
     398
     399        # Try to use mx fast string search if available. Otherwise
     400        # use Python find. Wrap the latter for consistency.
     401        unused_char = self._stream.read(1)
     402        if not unused_char:
     403            raise InputStreamExhausted
     404        self._stream.unget(unused_char)
     405        try:
     406            from mx.TextTools import FS
     407            self._fs = FS(boundary).find
     408        except ImportError:
     409            self._fs = lambda data: data.find(boundary)
     410
     411    def __iter__(self):
     412        return self
     413
     414    def next(self):
     415        if self._done:
     416            raise StopIteration
     417
     418        stream = self._stream
     419        rollback = self._rollback
     420
     421        bytes_read = 0
     422        chunks = []
     423        for bytes in stream:
     424            bytes_read += len(bytes)
     425            chunks.append(bytes)
     426            if bytes_read > rollback:
     427                break
     428            if not bytes:
     429                break
     430        else:
     431            self._done = True
     432
     433        if not chunks:
     434            raise StopIteration
     435
     436        chunk = ''.join(chunks)
     437
     438        boundary = self._find_boundary(chunk, len(chunk) < self._rollback)
     439
     440
     441        if boundary:
     442            end, next = boundary
     443            stream.unget(chunk[next:])
     444            self._done = True
     445            return chunk[:end]
     446        else:
     447            # make sure we dont treat a partial boundary (and
     448            # its separators) as data
     449            if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6):
     450                # There's nothing left, we should just return and mark as done.
     451                self._done = True
     452                return chunk
     453            else:
     454                stream.unget(chunk[-rollback:])
     455                return chunk[:-rollback]
     456
     457    def _find_boundary(self, data, eof = False):
     458        """
     459        Finds a multipart boundary in data.
     460
     461        Should no boundry exist in the data None is returned
     462        instead. Otherwise a tuple containing
     463        the indices of the following are returned:
     464
     465         * the end of current encapsulation
     466
     467         * the start of the next encapsulation
     468        """
     469        index = self._fs(data)
     470        if index < 0:
     471            return None
     472        else:
     473            end = index
     474            next = index + len(self._boundary)
     475            data_len = len(data) - 1
     476            # backup over CRLF
     477            if data[max(0,end-1)] == '\n': end -= 1
     478            if data[max(0,end-1)] == '\r': end -= 1
     479            # skip over --CRLF
     480            if data[min(data_len,next)] == '-': next += 1
     481            if data[min(data_len,next)] == '-': next += 1
     482            if data[min(data_len,next)] == '\r': next += 1
     483            if data[min(data_len,next)] == '\n': next += 1
     484            return end, next
     485
     486def ParseBoundaryStream(stream, max_header_size):
     487        """
     488        Parses one and exactly one stream that encapsulates a boundary.
     489        """
     490        # Stream at beginning of header, look for end of header
     491        # and parse it if found. The header must fit within one
     492        # chunk.
     493        chunk = stream.read(max_header_size)
     494        # 'find' returns the top of these four bytes, so we'll
     495        # need to munch them later to prevent them from polluting
     496        # the payload.
     497        header_end = chunk.find('\r\n\r\n')
     498
     499        def parse_header(line):
     500            from cgi import parse_header
     501            main_value_pair, params = parse_header(line)
     502            try:
     503                name, value = main_value_pair.split(':', 1)
     504            except:
     505                raise ValueError("Invalid header: %r" % line)
     506            return name, (value, params)
     507
     508        if header_end == -1:
     509            # we find no header, so we just mark this fact and pass on
     510            # the stream verbatim
     511            stream.unget(chunk)
     512            return ('RAW', {}, stream)
     513
     514        header = chunk[:header_end]
     515
     516        # here we place any excess chunk back onto the stream, as
     517        # well as throwing away the CRLFCRLF bytes from above.
     518        stream.unget(chunk[header_end + 4:])
     519
     520        is_file_field = False
     521        outdict = {}
     522
     523        # eliminate blank lines
     524        for line in header.split('\r\n'):
     525            # This terminology ("main value" and "dictionary of
     526            # parameters") is from the Python docs.
     527            name, (value, params) = parse_header(line)
     528            if name == 'content-disposition' and params.get('filename'):
     529                is_file_field = True
     530
     531            outdict[name] = value, params
     532
     533        if is_file_field:
     534            return ('FILE', outdict, stream)
     535        else:
     536            return ('FIELD', outdict, stream)
     537
     538
     539class Parser(object):
     540    def __init__(self, stream, boundary):
     541        self._stream = stream
     542        self._separator = '--' + boundary
     543
     544    def __iter__(self):
     545
     546        boundarystream = InterBoundaryIter(self._stream,
     547                                           self._separator)
     548
     549        for sub_stream in boundarystream:
     550            # Iterate over each part
     551            yield ParseBoundaryStream(sub_stream, 1024)
     552
     553
     554
  • django/http/__init__.py

     
    1111
    1212from django.utils.datastructures import MultiValueDict, FileDict
    1313from django.utils.encoding import smart_str, iri_to_uri, force_unicode
    14 
     14from django.http.multipartparser import MultiPartParser
    1515from utils import *
    1616
    1717RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
     
    3030        self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
    3131        self.path = ''
    3232        self.method = None
     33        self.upload_handlers = []
    3334
    3435    def __repr__(self):
    3536        return '<HttpRequest\nGET:%s,\nPOST:%s,\nCOOKIES:%s,\nMETA:%s>' % \
     
    102103
    103104    encoding = property(_get_encoding, _set_encoding)
    104105
    105 def parse_file_upload(header_dict, post_data):
    106     """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
    107     import email, email.Message
    108     from cgi import parse_header
    109     raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
    110     raw_message += '\r\n\r\n' + post_data
    111     msg = email.message_from_string(raw_message)
    112     POST = QueryDict('', mutable=True)
    113     FILES = MultiValueDict()
    114     for submessage in msg.get_payload():
    115         if submessage and isinstance(submessage, email.Message.Message):
    116             name_dict = parse_header(submessage['Content-Disposition'])[1]
    117             # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
    118             # or {'name': 'blah'} for POST fields
    119             # We assume all uploaded files have a 'filename' set.
    120             if 'filename' in name_dict:
    121                 assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
    122                 if not name_dict['filename'].strip():
    123                     continue
    124                 # IE submits the full path, so trim everything but the basename.
    125                 # (We can't use os.path.basename because that uses the server's
    126                 # directory separator, which may not be the same as the
    127                 # client's one.)
    128                 filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
    129                 FILES.appendlist(name_dict['name'], FileDict({
    130                     'filename': filename,
    131                     'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
    132                     'content': submessage.get_payload(),
    133                 }))
     106    def set_upload_handler(self, upload_handler):
     107        """
     108        Set the upload handler to the new handler given in the parameter.
     109        """
     110        if hasattr(self, '_files'):
     111            raise AttributeError("You cannot set the upload handler after the upload has been processed.")
     112        self.upload_handlers = [upload_handler]
     113
     114    def parse_file_upload(self, META, post_data):
     115        """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
     116        from django.http.fileuploadhandler import TemporaryFileUploadHandler, MemoryFileUploadHandler
     117
     118        if not self.upload_handlers:
     119            try:
     120                content_length = int(META.get('HTTP_CONTENT_LENGTH',
     121                                              META.get('CONTENT_LENGTH',0)))
     122            except (ValueError, TypeError):
     123                content_length = 0
     124
     125            if content_length and content_length > 1048576:
     126                # If the header is big enough, use temporary files.
     127                self.upload_handlers = [TemporaryFileUploadHandler()]
    134128            else:
    135                 POST.appendlist(name_dict['name'], submessage.get_payload())
    136     return POST, FILES
     129                self.upload_handlers = [TemporaryFileUploadHandler()]
     130                #self.upload_handlers = [MemoryFileUploadHandler()]
    137131
     132        parser = MultiPartParser(META, post_data, self.upload_handlers,
     133                                 self.encoding)
     134        return parser.parse()
    138135
     136
    139137class QueryDict(MultiValueDict):
    140138    """
    141139    A specialized MultiValueDict that takes a query string when initialized.
  • django/http/fileuploadhandler.py

     
     1""" A fileuploadhandler base and default subclass for handling file uploads.
     2"""
     3import os
     4try:
     5    from cStringIO import StringIO
     6except ImportError:
     7    from StringIO import StringIO
     8
     9
     10from django.http.uploadedfile import TemporaryUploadedFile, InMemoryUploadedFile
     11
     12__all__ = ('UploadFileException','StopUpload', 'SkipFile',
     13           'FileUploadHandler', 'TemporaryFileUploadHandler',
     14           'MemoryFileUploadHandler')
     15
     16
     17class UploadFileException(Exception):
     18    """ Any error having to do with Uploading Files. """
     19    pass
     20
     21class StopUpload(UploadFileException):
     22    """ This exception is raised when an upload must abort. """
     23    pass
     24
     25class SkipFile(UploadFileException):
     26    """ This exception is raised when a file needs to be skipped. """
     27    pass
     28
     29
     30class FileUploadHandler(object):
     31    """ FileUploadHandler will take data and handle file uploads
     32    in a streamed fashion.
     33    """
     34    chunk_size = 64 * 2 ** 10 #: The default chunk size is 64 KB.
     35
     36    def __init__(self):
     37        " Initialize some local variables. "
     38        self.file_name = None
     39        self.content_type = None
     40        self.content_length = None
     41        self.charset = None
     42
     43    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
     44        """
     45        Signal that a new file has been started.
     46       
     47        Warning: Do not trust content_length, if you get it at all.
     48        """
     49        self.field_name = field_name
     50        self.file_name = file_name
     51        self.content_type = content_type
     52        self.content_length = content_length
     53        self.charset = charset
     54
     55    def receive_data_chunk(self, raw_data, start, stop):
     56        """
     57        Receive data from the streamed upload parser.
     58        Start and stop are the positions in the file.
     59        This equality should always be true::
     60            len(raw_data) = stop - start
     61        """
     62        raise NotImplementedError()
     63
     64    def file_complete(self, file_size):
     65        """
     66        Signal that a file has completed.
     67        File size corresponds to the actual size accumulated
     68        by all the chunks.
     69
     70        This should return a valid UploadedFile object.
     71        """
     72        raise NotImplementedError()
     73
     74    def upload_complete(self):
     75        """
     76        Signal that the upload is complete.
     77        Do any cleanup that is necessary for this handler.
     78        """
     79        pass
     80
     81
     82
     83class TemporaryFileUploadHandler(FileUploadHandler):
     84    """
     85    Upload the streaming data into a temporary file.
     86    """
     87    def __init__(self, *args, **kwargs):
     88        """ Import settings for later. """
     89        super(TemporaryFileUploadHandler, self).__init__(*args, **kwargs)
     90        global settings
     91        from django.conf import settings
     92
     93    def new_file(self, file_name, *args, **kwargs):
     94        """
     95        Create the file object to append to as data is coming in.
     96        """
     97        super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs)
     98        self.file = TemporaryFile(settings.FILE_UPLOAD_DIR)
     99        self.write = self.file.write
     100
     101    def receive_data_chunk(self, raw_data, start, stop):
     102        """
     103        Once we get the data, we will save it to our file.
     104        """
     105        self.write(raw_data)
     106
     107    def file_complete(self, file_size):
     108        """
     109        Signal that a file has completed.
     110        File size corresponds to the actual size accumulated
     111        by all the chunks.
     112
     113        This should return a valid UploadedFile object.
     114        """
     115        self.file.seek(0)
     116        return TemporaryUploadedFile(self.file, self.file_name,
     117                                     self.content_type, file_size,
     118                                     self.charset)
     119
     120
     121class TemporaryFile(object):
     122    """
     123    A temporary file that tries to delete itself when garbage collected.
     124    """
     125    def __init__(self, dir):
     126        import tempfile
     127        if not dir:
     128            dir = tempfile.gettempdir()
     129        (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
     130        self.file = os.fdopen(fd, 'w+b')
     131        self.name = name
     132
     133    def __getattr__(self, name):
     134        a = getattr(self.__dict__['file'], name)
     135        if type(a) != type(0):
     136            setattr(self, name, a)
     137        return a
     138
     139    def __del__(self):
     140        try:
     141            os.unlink(self.name)
     142        except OSError:
     143            pass
     144
     145
     146class MemoryFileUploadHandler(FileUploadHandler):
     147    """
     148    The MemoryFileUploadHandler will place the data directly into memory.
     149    """
     150    chunk_size = 32 * 2 ** 40 #: Make the chunk size huge
     151
     152    def __init__(self):
     153        " Initialize some local variables. "
     154        self.file_name = None
     155        self.content_type = None
     156        self.content_length = None
     157
     158    def new_file(self, field_name, file_name, content_type, content_length, charset):
     159        """
     160        Set the local data variable to empty.
     161        """
     162        self.field_name = field_name
     163        self.file_name = file_name
     164        self.content_type = content_type
     165        self.content_length = content_length
     166        self.charset = charset
     167        self.file = StringIO()
     168
     169    def receive_data_chunk(self, raw_data, start, stop):
     170        """
     171        Append the data to the local data variable.
     172        """
     173        self.file.write(raw_data)
     174
     175    def file_complete(self, file_size):
     176        """
     177        Return an in memory file object.
     178        """
     179        current_file = self.file
     180        self.file = None #: Decrement the ref count
     181        return InMemoryUploadedFile(current_file, self.field_name,
     182                                    self.file_name, self.content_type,
     183                                    self.charset)
     184
  • django/conf/global_settings.py

     
    224224# Example: "http://media.lawrence.com"
    225225MEDIA_URL = ''
    226226
     227# Directory to upload streamed files temporarily.
     228# A value of `None` means that it will use the default temporary
     229# directory for the server's operating system.
     230FILE_UPLOAD_DIR = None
     231
    227232# Default formatting for date objects. See all available format strings here:
    228233# http://www.djangoproject.com/documentation/templates/#now
    229234DATE_FORMAT = 'N j, Y'
  • django/core/handlers/wsgi.py

     
    7878        self.path = force_unicode(environ['PATH_INFO'])
    7979        self.META = environ
    8080        self.method = environ['REQUEST_METHOD'].upper()
     81        self.upload_handlers = []
    8182
    8283    def __repr__(self):
    8384        # Since this is called as part of error handling, we need to be very
     
    112113        # Populates self._post and self._files
    113114        if self.method == 'POST':
    114115            if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
    115                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
    116                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
    117                 self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
     116                self._raw_post_data = ''
     117                self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
    118118            else:
    119119                self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
    120120        else:
  • django/core/handlers/modpython.py

     
    1616    def __init__(self, req):
    1717        self._req = req
    1818        self.path = force_unicode(req.uri)
     19        self.upload_handlers = []
    1920
    2021    def __repr__(self):
    2122        # Since this is called as part of error handling, we need to be very
     
    5354    def _load_post_and_files(self):
    5455        "Populates self._post and self._files"
    5556        if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
    56             self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
     57            self._raw_post_data = ''
     58            self._post, self._files = self.parse_file_upload(self.META, self._req)
    5759        else:
    5860            self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
    5961
  • django/core/files/filemove.py

     
     1import os
     2
     3__all__ = ('file_move_safe',)
     4
     5try:
     6    import shutil
     7    file_move = shutil.move
     8except ImportError:
     9    file_move = os.rename
     10
     11def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
     12    """
     13    Moves a file from one location to another in the safest way possible.
     14   
     15    First, it tries using shutils.move, which is OS-dependent but doesn't
     16    break with change of filesystems. Then it tries os.rename, which will
     17    break if it encounters a change in filesystems. Lastly, it streams
     18    it manually from one file to another in python.
     19
     20    Without ``allow_overwrite``, if the destination file exists, the
     21    file will raise an IOError.
     22    """
     23
     24    from django.core.files import filelocks
     25
     26    if old_file_name == new_file_name:
     27        # No file moving takes place.
     28        return
     29
     30    if not allow_overwrite and os.path.exists(new_file_name):
     31        raise IOError, "Django does not allow overwriting files."
     32
     33    try:
     34        file_move(old_file_name, new_file_name)
     35        return
     36    except OSError: # moving to another filesystem
     37        pass
     38
     39    new_file = open(new_file_name, 'wb')
     40    # exclusive lock
     41    filelocks.lock(new_file, filelocks.LOCK_EX)
     42    old_file = open(old_file_name, 'rb')
     43    current_chunk = None
     44
     45    while current_chunk != '':
     46        current_chunk = old_file.read(chunk_size)
     47        new_file.write(current_chunk)
     48
     49    new_file.close()
     50    old_file.close()
     51
     52    os.remove(old_file_name)
     53
Back to Top