| | 1 | """ |
|---|
| | 2 | MultiPart parsing for file uploads. |
|---|
| | 3 | |
|---|
| | 4 | This object will take the file upload headers |
|---|
| | 5 | and the file upload handler and chunk the upload |
|---|
| | 6 | data for the handler to deal with. |
|---|
| | 7 | """ |
|---|
| | 8 | from django.utils.datastructures import MultiValueDict |
|---|
| | 9 | from django.utils.encoding import force_unicode |
|---|
| | 10 | |
|---|
| | 11 | __all__ = ('MultiPartParser','MultiPartParserError','InputStreamExhausted') |
|---|
| | 12 | |
|---|
| | 13 | class MultiPartParserError(Exception): |
|---|
| | 14 | pass |
|---|
| | 15 | |
|---|
| | 16 | class InputStreamExhausted(Exception): |
|---|
| | 17 | """ No more reads are allowed from this device. """ |
|---|
| | 18 | pass |
|---|
| | 19 | |
|---|
| | 20 | class MultiPartParser(object): |
|---|
| | 21 | """ |
|---|
| | 22 | A rfc2388 multipart/form-data parser. |
|---|
| | 23 | |
|---|
| | 24 | parse() reads the input stream in chunk_size chunks and returns a |
|---|
| | 25 | tuple of (POST MultiValueDict, FILES MultiValueDict). If |
|---|
| | 26 | file_upload_dir is defined files will be streamed to temporary |
|---|
| | 27 | files in the specified directory. |
|---|
| | 28 | """ |
|---|
| | 29 | def __init__(self, META, input_data, upload_handlers, encoding=None): |
|---|
| | 30 | """ |
|---|
| | 31 | Initialize the MultiPartParser object. |
|---|
| | 32 | |
|---|
| | 33 | *META* -- The standard META dictionary in Django request objects. |
|---|
| | 34 | *input_data* -- The raw post data, as a bytestring. |
|---|
| | 35 | *upload_handler* -- An object of type UploadHandler |
|---|
| | 36 | that performs operations on the uploaded |
|---|
| | 37 | data. |
|---|
| | 38 | *encoding* -- The encoding with which to treat the incoming data. |
|---|
| | 39 | """ |
|---|
| | 40 | # Import cgi utilities for (near) future use. |
|---|
| | 41 | global parse_header, valid_boundary, settings |
|---|
| | 42 | from django.conf import settings |
|---|
| | 43 | from cgi import valid_boundary, parse_header |
|---|
| | 44 | |
|---|
| | 45 | ####### |
|---|
| | 46 | # Check basic headers |
|---|
| | 47 | ####### |
|---|
| | 48 | |
|---|
| | 49 | # |
|---|
| | 50 | # Content-Type should containt multipart and the boundary information. |
|---|
| | 51 | #### |
|---|
| | 52 | |
|---|
| | 53 | content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', '')) |
|---|
| | 54 | if not content_type.startswith('multipart/'): |
|---|
| | 55 | raise MultiPartParserError('Invalid Content-Type: %s' % |
|---|
| | 56 | content_type) |
|---|
| | 57 | |
|---|
| | 58 | # Parse the header to get the boundary to split the parts. |
|---|
| | 59 | ctypes, opts = parse_header(content_type) |
|---|
| | 60 | boundary = opts.get('boundary') |
|---|
| | 61 | if not boundary or not valid_boundary(boundary): |
|---|
| | 62 | raise MultiPartParserError('Invalid boundary in multipart: %s' % |
|---|
| | 63 | boundary) |
|---|
| | 64 | |
|---|
| | 65 | |
|---|
| | 66 | # |
|---|
| | 67 | # Content-Length should contain the length of the body we are about |
|---|
| | 68 | # to receive. |
|---|
| | 69 | #### |
|---|
| | 70 | try: |
|---|
| | 71 | content_length = int(META.get('HTTP_CONTENT_LENGTH', |
|---|
| | 72 | META.get('CONTENT_LENGTH',0))) |
|---|
| | 73 | except (ValueError, TypeError): |
|---|
| | 74 | # For now set it to 0...we'll try again later on down. |
|---|
| | 75 | content_length = 0 |
|---|
| | 76 | |
|---|
| | 77 | # If we have better knowledge of how much |
|---|
| | 78 | # data is remaining in the request stream, |
|---|
| | 79 | # we should use that. (modpython for instance) |
|---|
| | 80 | #try: |
|---|
| | 81 | # remaining = input_data.remaining |
|---|
| | 82 | # if remaining is not None and \ |
|---|
| | 83 | # (content_length is None or remaining < content_length): |
|---|
| | 84 | # content_length = remaining |
|---|
| | 85 | #except AttributeError: |
|---|
| | 86 | # pass |
|---|
| | 87 | |
|---|
| | 88 | if not content_length: |
|---|
| | 89 | # This means we shouldn't continue...raise an error. |
|---|
| | 90 | raise MultiPartParserError("Invalid content length: %r" % content_length) |
|---|
| | 91 | |
|---|
| | 92 | |
|---|
| | 93 | # For now, just use the first upload handler: |
|---|
| | 94 | upload_handler = upload_handlers[0] |
|---|
| | 95 | |
|---|
| | 96 | self._boundary = boundary |
|---|
| | 97 | self._input_data = input_data |
|---|
| | 98 | |
|---|
| | 99 | # For compatibility with low-level network APIs, |
|---|
| | 100 | # the chunk size should be <= 2^31 - 1: |
|---|
| | 101 | self._chunk_size = min(upload_handler.chunk_size, 2147483647) |
|---|
| | 102 | |
|---|
| | 103 | self._post = MultiValueDict() |
|---|
| | 104 | self._files = MultiValueDict() |
|---|
| | 105 | self._encoding = encoding or settings.DEFAULT_CHARSET |
|---|
| | 106 | self._content_length = content_length |
|---|
| | 107 | self._upload_handler = upload_handler |
|---|
| | 108 | |
|---|
| | 109 | def parse(self): |
|---|
| | 110 | """ |
|---|
| | 111 | Parse the POST data and break it into a FILES MultiValueDict |
|---|
| | 112 | and a POST MultiValueDict. |
|---|
| | 113 | |
|---|
| | 114 | *returns* -- A tuple containing the POST and FILES dictionary, |
|---|
| | 115 | respectively. |
|---|
| | 116 | """ |
|---|
| | 117 | from base64 import b64decode |
|---|
| | 118 | from django.http.fileuploadhandler import StopUpload, SkipFile |
|---|
| | 119 | |
|---|
| | 120 | encoding = self._encoding |
|---|
| | 121 | handler = self._upload_handler |
|---|
| | 122 | |
|---|
| | 123 | limited_input_data = LimitBytes(self._input_data, self._content_length) |
|---|
| | 124 | |
|---|
| | 125 | # Instantiate the parser and stream: |
|---|
| | 126 | stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size)) |
|---|
| | 127 | for item_type, meta_data, stream in Parser(stream, self._boundary): |
|---|
| | 128 | try: |
|---|
| | 129 | disposition = meta_data['content-disposition'][1] |
|---|
| | 130 | field_name = disposition['name'].strip() |
|---|
| | 131 | except (KeyError, IndexError, AttributeError): |
|---|
| | 132 | continue |
|---|
| | 133 | |
|---|
| | 134 | transfer_encoding = meta_data.get('content-transfer-encoding') |
|---|
| | 135 | |
|---|
| | 136 | if item_type == 'FIELD': |
|---|
| | 137 | # This is a post field, we can just set it in the post |
|---|
| | 138 | if transfer_encoding == 'base64': |
|---|
| | 139 | raw_data = stream.read() |
|---|
| | 140 | try: |
|---|
| | 141 | data = b64decode(raw_data) |
|---|
| | 142 | except TypeError: |
|---|
| | 143 | data = raw_data |
|---|
| | 144 | else: |
|---|
| | 145 | data = stream.read() |
|---|
| | 146 | |
|---|
| | 147 | self._post.appendlist(force_unicode(field_name, encoding, errors='replace'), |
|---|
| | 148 | force_unicode(data, encoding, errors='replace')) |
|---|
| | 149 | elif item_type == 'FILE': |
|---|
| | 150 | # This is a file, use the handler... |
|---|
| | 151 | file_successful = True |
|---|
| | 152 | file_name = self.IE_sanitize(disposition.get('filename')) |
|---|
| | 153 | if not file_name: |
|---|
| | 154 | continue |
|---|
| | 155 | |
|---|
| | 156 | file_name = force_unicode(file_name, encoding, errors='replace') |
|---|
| | 157 | |
|---|
| | 158 | content_type = meta_data.get('content-type', ('',))[0].strip() |
|---|
| | 159 | try: |
|---|
| | 160 | charset = meta_data.get('content-type', (0,{}))[1].get('charset', None) |
|---|
| | 161 | except: |
|---|
| | 162 | charset = None |
|---|
| | 163 | |
|---|
| | 164 | try: |
|---|
| | 165 | content_length = int(meta_data.get('content-length')[0]) |
|---|
| | 166 | except (IndexError, TypeError, ValueError): |
|---|
| | 167 | content_length = None |
|---|
| | 168 | |
|---|
| | 169 | counter = 0 |
|---|
| | 170 | try: |
|---|
| | 171 | handler.new_file(field_name, file_name, |
|---|
| | 172 | content_type, content_length, |
|---|
| | 173 | charset) |
|---|
| | 174 | for chunk in stream: |
|---|
| | 175 | if transfer_encoding == 'base64': |
|---|
| | 176 | # We only special-case base64 transfer encoding |
|---|
| | 177 | try: |
|---|
| | 178 | chunk = b64decode(chunk) |
|---|
| | 179 | except TypeError, e: |
|---|
| | 180 | raise MultiValueParseError("Could not decode base64 data: %r" % e) |
|---|
| | 181 | |
|---|
| | 182 | chunk_length = len(chunk) |
|---|
| | 183 | counter += chunk_length |
|---|
| | 184 | handler.receive_data_chunk(chunk, |
|---|
| | 185 | counter - chunk_length, |
|---|
| | 186 | counter) |
|---|
| | 187 | except (StopUpload, SkipFile), e: |
|---|
| | 188 | file_successful = False |
|---|
| | 189 | if isinstance(e, SkipFile): |
|---|
| | 190 | # Just use up the rest of this file... |
|---|
| | 191 | stream.exhaust() |
|---|
| | 192 | elif isinstance(e, StopUpload): |
|---|
| | 193 | # Abort the parsing and break |
|---|
| | 194 | parser.abort() |
|---|
| | 195 | break |
|---|
| | 196 | else: |
|---|
| | 197 | # Only do this if the handler didn't raise an abort error |
|---|
| | 198 | file_obj = handler.file_complete(counter) |
|---|
| | 199 | if file_obj: |
|---|
| | 200 | # If it returns a file object, then set the files dict. |
|---|
| | 201 | self._files.appendlist(force_unicode(field_name, |
|---|
| | 202 | encoding, |
|---|
| | 203 | errors='replace'), |
|---|
| | 204 | file_obj) |
|---|
| | 205 | else: |
|---|
| | 206 | stream.exhuast() |
|---|
| | 207 | |
|---|
| | 208 | # Make sure that the request data is all fed |
|---|
| | 209 | limited_input_data.exhaust() |
|---|
| | 210 | |
|---|
| | 211 | # Signal that the upload has completed. |
|---|
| | 212 | handler.upload_complete() |
|---|
| | 213 | |
|---|
| | 214 | return self._post, self._files |
|---|
| | 215 | |
|---|
| | 216 | def IE_sanitize(self, filename): |
|---|
| | 217 | """cleanup filename from IE full paths""" |
|---|
| | 218 | return filename and filename[filename.rfind("\\")+1:].strip() |
|---|
| | 219 | |
|---|
| | 220 | |
|---|
| | 221 | class LazyStream(object): |
|---|
| | 222 | def __init__(self, producer, length=None): |
|---|
| | 223 | """ |
|---|
| | 224 | Every LazyStream must have a producer when instantiated. |
|---|
| | 225 | |
|---|
| | 226 | A producer is an iterable that returns a string each time it |
|---|
| | 227 | is called. |
|---|
| | 228 | """ |
|---|
| | 229 | self._producer = producer |
|---|
| | 230 | self._empty = False |
|---|
| | 231 | self._leftover = '' |
|---|
| | 232 | self.length = length |
|---|
| | 233 | self.position = 0 |
|---|
| | 234 | self._remaining = length |
|---|
| | 235 | |
|---|
| | 236 | def tell(self): |
|---|
| | 237 | return self.position |
|---|
| | 238 | |
|---|
| | 239 | def read(self, size=None): |
|---|
| | 240 | def parts(): |
|---|
| | 241 | remaining = (size is not None and [size] or [self._remaining])[0] |
|---|
| | 242 | # do the whole thing in one shot if no limit was provided. |
|---|
| | 243 | if remaining is None: |
|---|
| | 244 | yield ''.join(self) |
|---|
| | 245 | return |
|---|
| | 246 | |
|---|
| | 247 | # otherwise do some bookkeeping to return exactly enough |
|---|
| | 248 | # of the stream and stashing any extra content we get from |
|---|
| | 249 | # the producer |
|---|
| | 250 | while remaining != 0: |
|---|
| | 251 | assert remaining > 0, 'remaining bytes to read should never go negative' |
|---|
| | 252 | |
|---|
| | 253 | chunk = self.next() |
|---|
| | 254 | |
|---|
| | 255 | emitting = chunk[:remaining] |
|---|
| | 256 | self.unget(chunk[remaining:]) |
|---|
| | 257 | remaining -= len(emitting) |
|---|
| | 258 | yield emitting |
|---|
| | 259 | |
|---|
| | 260 | out = ''.join(parts()) |
|---|
| | 261 | self.position += len(out) |
|---|
| | 262 | return out |
|---|
| | 263 | |
|---|
| | 264 | def next(self): |
|---|
| | 265 | """ |
|---|
| | 266 | Used when the exact number of bytes to read is unimportant. |
|---|
| | 267 | |
|---|
| | 268 | This procedure just returns whatever is chunk is conveniently |
|---|
| | 269 | returned from the iterator instead. Useful to avoid |
|---|
| | 270 | unnecessary bookkeeping if performance is an issue. |
|---|
| | 271 | """ |
|---|
| | 272 | if self._leftover: |
|---|
| | 273 | output = self._leftover |
|---|
| | 274 | self.position += len(output) |
|---|
| | 275 | self._leftover = '' |
|---|
| | 276 | return output |
|---|
| | 277 | else: |
|---|
| | 278 | output = self._producer.next() |
|---|
| | 279 | self.position += len(output) |
|---|
| | 280 | return output |
|---|
| | 281 | |
|---|
| | 282 | def close(self): |
|---|
| | 283 | """ |
|---|
| | 284 | Used to invalidate/disable this lazy stream. |
|---|
| | 285 | |
|---|
| | 286 | Replaces the producer with an empty list. Any leftover bytes |
|---|
| | 287 | that have already been read will still be reported upon read() |
|---|
| | 288 | and/or next(). |
|---|
| | 289 | """ |
|---|
| | 290 | self._producer = [] |
|---|
| | 291 | |
|---|
| | 292 | def __iter__(self): |
|---|
| | 293 | return self |
|---|
| | 294 | |
|---|
| | 295 | def unget(self, bytes): |
|---|
| | 296 | """ |
|---|
| | 297 | Places bytes back onto the front of the lazy stream. |
|---|
| | 298 | |
|---|
| | 299 | Future calls to read() will return those bytes first. The |
|---|
| | 300 | stream position and thus tell() will be rewound. |
|---|
| | 301 | """ |
|---|
| | 302 | self.position -= len(bytes) |
|---|
| | 303 | self._leftover = ''.join([bytes, self._leftover]) |
|---|
| | 304 | |
|---|
| | 305 | def exhaust(self): |
|---|
| | 306 | """ |
|---|
| | 307 | Exhausts the entire underlying stream. |
|---|
| | 308 | |
|---|
| | 309 | Useful for skipping and advancing sections. |
|---|
| | 310 | """ |
|---|
| | 311 | for thing in self: |
|---|
| | 312 | pass |
|---|
| | 313 | |
|---|
| | 314 | |
|---|
| | 315 | class ChunkIter(object): |
|---|
| | 316 | def __init__(self, flo, chunk_size=1024**2): |
|---|
| | 317 | self.flo = flo |
|---|
| | 318 | self.chunk_size = chunk_size |
|---|
| | 319 | |
|---|
| | 320 | def next(self): |
|---|
| | 321 | try: |
|---|
| | 322 | data = self.flo.read(self.chunk_size) |
|---|
| | 323 | except InputStreamExhausted: |
|---|
| | 324 | raise StopIteration |
|---|
| | 325 | if data: |
|---|
| | 326 | return data |
|---|
| | 327 | else: |
|---|
| | 328 | raise StopIteration |
|---|
| | 329 | |
|---|
| | 330 | def __iter__(self): |
|---|
| | 331 | return self |
|---|
| | 332 | |
|---|
| | 333 | |
|---|
| | 334 | class LimitBytes(object): |
|---|
| | 335 | """ Limit bytes for a file object. """ |
|---|
| | 336 | def __init__(self, fileobject, length): |
|---|
| | 337 | self._file = fileobject |
|---|
| | 338 | self.remaining = length |
|---|
| | 339 | |
|---|
| | 340 | def read(self, num_bytes=None): |
|---|
| | 341 | """ |
|---|
| | 342 | Read data from the underlying file. |
|---|
| | 343 | If you ask for too much or there isn't anything left, |
|---|
| | 344 | this will raise an InputStreamExhausted error. |
|---|
| | 345 | """ |
|---|
| | 346 | if self.remaining <= 0: |
|---|
| | 347 | raise InputStreamExhausted() |
|---|
| | 348 | num_bytes = min(num_bytes, self.remaining) |
|---|
| | 349 | self.remaining -= num_bytes |
|---|
| | 350 | return self._file.read(num_bytes) |
|---|
| | 351 | |
|---|
| | 352 | def exhaust(self): |
|---|
| | 353 | """ |
|---|
| | 354 | Exhaust this file until all of the bytes it was limited by |
|---|
| | 355 | have been read. |
|---|
| | 356 | """ |
|---|
| | 357 | while self.remaining > 0: |
|---|
| | 358 | num_bytes = min(self.remaining, 16384) |
|---|
| | 359 | __ = self._file.read(num_bytes) |
|---|
| | 360 | self.remaining -= num_bytes |
|---|
| | 361 | |
|---|
| | 362 | |
|---|
| | 363 | class InterBoundaryIter(object): |
|---|
| | 364 | """ |
|---|
| | 365 | A Producer that will iterate over boundaries. |
|---|
| | 366 | """ |
|---|
| | 367 | def __init__(self, stream, boundary): |
|---|
| | 368 | self._stream = stream |
|---|
| | 369 | self._boundary = boundary |
|---|
| | 370 | |
|---|
| | 371 | def __iter__(self): |
|---|
| | 372 | return self |
|---|
| | 373 | |
|---|
| | 374 | def next(self): |
|---|
| | 375 | try: |
|---|
| | 376 | return LazyStream(BoundaryIter(self._stream, self._boundary)) |
|---|
| | 377 | except InputStreamExhausted: |
|---|
| | 378 | raise StopIteration |
|---|
| | 379 | |
|---|
| | 380 | class BoundaryIter(object): |
|---|
| | 381 | """ |
|---|
| | 382 | A Producer that is sensitive to boundaries. |
|---|
| | 383 | |
|---|
| | 384 | Will happily yield bytes until a boundary is found. Will yield the |
|---|
| | 385 | bytes before the boundary, throw away the boundary bytes |
|---|
| | 386 | themselves, and push the post-boundary bytes back on the stream. |
|---|
| | 387 | |
|---|
| | 388 | The future calls to .next() after locating the boundary will raise |
|---|
| | 389 | a StopIteration exception. |
|---|
| | 390 | """ |
|---|
| | 391 | def __init__(self, stream, boundary): |
|---|
| | 392 | self._stream = stream |
|---|
| | 393 | self._boundary = boundary |
|---|
| | 394 | self._done = False |
|---|
| | 395 | # rollback an additional six bytes because the format is like |
|---|
| | 396 | # this: CRLF<boundary>[--CRLF] |
|---|
| | 397 | self._rollback = len(boundary) + 6 |
|---|
| | 398 | |
|---|
| | 399 | # Try to use mx fast string search if available. Otherwise |
|---|
| | 400 | # use Python find. Wrap the latter for consistency. |
|---|
| | 401 | unused_char = self._stream.read(1) |
|---|
| | 402 | if not unused_char: |
|---|
| | 403 | raise InputStreamExhausted |
|---|
| | 404 | self._stream.unget(unused_char) |
|---|
| | 405 | try: |
|---|
| | 406 | from mx.TextTools import FS |
|---|
| | 407 | self._fs = FS(boundary).find |
|---|
| | 408 | except ImportError: |
|---|
| | 409 | self._fs = lambda data: data.find(boundary) |
|---|
| | 410 | |
|---|
| | 411 | def __iter__(self): |
|---|
| | 412 | return self |
|---|
| | 413 | |
|---|
| | 414 | def next(self): |
|---|
| | 415 | if self._done: |
|---|
| | 416 | raise StopIteration |
|---|
| | 417 | |
|---|
| | 418 | stream = self._stream |
|---|
| | 419 | rollback = self._rollback |
|---|
| | 420 | |
|---|
| | 421 | bytes_read = 0 |
|---|
| | 422 | chunks = [] |
|---|
| | 423 | for bytes in stream: |
|---|
| | 424 | bytes_read += len(bytes) |
|---|
| | 425 | chunks.append(bytes) |
|---|
| | 426 | if bytes_read > rollback: |
|---|
| | 427 | break |
|---|
| | 428 | if not bytes: |
|---|
| | 429 | break |
|---|
| | 430 | else: |
|---|
| | 431 | self._done = True |
|---|
| | 432 | |
|---|
| | 433 | if not chunks: |
|---|
| | 434 | raise StopIteration |
|---|
| | 435 | |
|---|
| | 436 | chunk = ''.join(chunks) |
|---|
| | 437 | |
|---|
| | 438 | boundary = self._find_boundary(chunk, len(chunk) < self._rollback) |
|---|
| | 439 | |
|---|
| | 440 | |
|---|
| | 441 | if boundary: |
|---|
| | 442 | end, next = boundary |
|---|
| | 443 | stream.unget(chunk[next:]) |
|---|
| | 444 | self._done = True |
|---|
| | 445 | return chunk[:end] |
|---|
| | 446 | else: |
|---|
| | 447 | # make sure we dont treat a partial boundary (and |
|---|
| | 448 | # its separators) as data |
|---|
| | 449 | if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6): |
|---|
| | 450 | # There's nothing left, we should just return and mark as done. |
|---|
| | 451 | self._done = True |
|---|
| | 452 | return chunk |
|---|
| | 453 | else: |
|---|
| | 454 | stream.unget(chunk[-rollback:]) |
|---|
| | 455 | return chunk[:-rollback] |
|---|
| | 456 | |
|---|
| | 457 | def _find_boundary(self, data, eof = False): |
|---|
| | 458 | """ |
|---|
| | 459 | Finds a multipart boundary in data. |
|---|
| | 460 | |
|---|
| | 461 | Should no boundry exist in the data None is returned |
|---|
| | 462 | instead. Otherwise a tuple containing |
|---|
| | 463 | the indices of the following are returned: |
|---|
| | 464 | |
|---|
| | 465 | * the end of current encapsulation |
|---|
| | 466 | |
|---|
| | 467 | * the start of the next encapsulation |
|---|
| | 468 | """ |
|---|
| | 469 | index = self._fs(data) |
|---|
| | 470 | if index < 0: |
|---|
| | 471 | return None |
|---|
| | 472 | else: |
|---|
| | 473 | end = index |
|---|
| | 474 | next = index + len(self._boundary) |
|---|
| | 475 | data_len = len(data) - 1 |
|---|
| | 476 | # backup over CRLF |
|---|
| | 477 | if data[max(0,end-1)] == '\n': end -= 1 |
|---|
| | 478 | if data[max(0,end-1)] == '\r': end -= 1 |
|---|
| | 479 | # skip over --CRLF |
|---|
| | 480 | if data[min(data_len,next)] == '-': next += 1 |
|---|
| | 481 | if data[min(data_len,next)] == '-': next += 1 |
|---|
| | 482 | if data[min(data_len,next)] == '\r': next += 1 |
|---|
| | 483 | if data[min(data_len,next)] == '\n': next += 1 |
|---|
| | 484 | return end, next |
|---|
| | 485 | |
|---|
| | 486 | def ParseBoundaryStream(stream, max_header_size): |
|---|
| | 487 | """ |
|---|
| | 488 | Parses one and exactly one stream that encapsulates a boundary. |
|---|
| | 489 | """ |
|---|
| | 490 | # Stream at beginning of header, look for end of header |
|---|
| | 491 | # and parse it if found. The header must fit within one |
|---|
| | 492 | # chunk. |
|---|
| | 493 | chunk = stream.read(max_header_size) |
|---|
| | 494 | # 'find' returns the top of these four bytes, so we'll |
|---|
| | 495 | # need to munch them later to prevent them from polluting |
|---|
| | 496 | # the payload. |
|---|
| | 497 | header_end = chunk.find('\r\n\r\n') |
|---|
| | 498 | |
|---|
| | 499 | def parse_header(line): |
|---|
| | 500 | from cgi import parse_header |
|---|
| | 501 | main_value_pair, params = parse_header(line) |
|---|
| | 502 | try: |
|---|
| | 503 | name, value = main_value_pair.split(':', 1) |
|---|
| | 504 | except: |
|---|
| | 505 | raise ValueError("Invalid header: %r" % line) |
|---|
| | 506 | return name, (value, params) |
|---|
| | 507 | |
|---|
| | 508 | if header_end == -1: |
|---|
| | 509 | # we find no header, so we just mark this fact and pass on |
|---|
| | 510 | # the stream verbatim |
|---|
| | 511 | stream.unget(chunk) |
|---|
| | 512 | return ('RAW', {}, stream) |
|---|
| | 513 | |
|---|
| | 514 | header = chunk[:header_end] |
|---|
| | 515 | |
|---|
| | 516 | # here we place any excess chunk back onto the stream, as |
|---|
| | 517 | # well as throwing away the CRLFCRLF bytes from above. |
|---|
| | 518 | stream.unget(chunk[header_end + 4:]) |
|---|
| | 519 | |
|---|
| | 520 | is_file_field = False |
|---|
| | 521 | outdict = {} |
|---|
| | 522 | |
|---|
| | 523 | # eliminate blank lines |
|---|
| | 524 | for line in header.split('\r\n'): |
|---|
| | 525 | # This terminology ("main value" and "dictionary of |
|---|
| | 526 | # parameters") is from the Python docs. |
|---|
| | 527 | name, (value, params) = parse_header(line) |
|---|
| | 528 | if name == 'content-disposition' and params.get('filename'): |
|---|
| | 529 | is_file_field = True |
|---|
| | 530 | |
|---|
| | 531 | outdict[name] = value, params |
|---|
| | 532 | |
|---|
| | 533 | if is_file_field: |
|---|
| | 534 | return ('FILE', outdict, stream) |
|---|
| | 535 | else: |
|---|
| | 536 | return ('FIELD', outdict, stream) |
|---|
| | 537 | |
|---|
| | 538 | |
|---|
| | 539 | class Parser(object): |
|---|
| | 540 | def __init__(self, stream, boundary): |
|---|
| | 541 | self._stream = stream |
|---|
| | 542 | self._separator = '--' + boundary |
|---|
| | 543 | |
|---|
| | 544 | def __iter__(self): |
|---|
| | 545 | |
|---|
| | 546 | boundarystream = InterBoundaryIter(self._stream, |
|---|
| | 547 | self._separator) |
|---|
| | 548 | |
|---|
| | 549 | for sub_stream in boundarystream: |
|---|
| | 550 | # Iterate over each part |
|---|
| | 551 | yield ParseBoundaryStream(sub_stream, 1024) |
|---|
| | 552 | |
|---|
| | 553 | |
|---|
| | 554 | |