| 1 |
""" |
|---|
| 2 |
Multi-part parsing for file uploads. |
|---|
| 3 |
|
|---|
| 4 |
Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to |
|---|
| 5 |
file upload handlers for processing. |
|---|
| 6 |
""" |
|---|
| 7 |
|
|---|
| 8 |
import cgi |
|---|
| 9 |
from django.conf import settings |
|---|
| 10 |
from django.core.exceptions import SuspiciousOperation |
|---|
| 11 |
from django.utils.datastructures import MultiValueDict |
|---|
| 12 |
from django.utils.encoding import force_unicode |
|---|
| 13 |
from django.utils.text import unescape_entities |
|---|
| 14 |
from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers |
|---|
| 15 |
|
|---|
| 16 |
__all__ = ('MultiPartParser', 'MultiPartParserError', 'InputStreamExhausted') |
|---|
| 17 |
|
|---|
| 18 |
class MultiPartParserError(Exception): |
|---|
| 19 |
pass |
|---|
| 20 |
|
|---|
| 21 |
class InputStreamExhausted(Exception): |
|---|
| 22 |
""" |
|---|
| 23 |
No more reads are allowed from this device. |
|---|
| 24 |
""" |
|---|
| 25 |
pass |
|---|
| 26 |
|
|---|
| 27 |
RAW = "raw" |
|---|
| 28 |
FILE = "file" |
|---|
| 29 |
FIELD = "field" |
|---|
| 30 |
|
|---|
| 31 |
class MultiPartParser(object): |
|---|
| 32 |
""" |
|---|
| 33 |
A rfc2388 multipart/form-data parser. |
|---|
| 34 |
|
|---|
| 35 |
``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks |
|---|
| 36 |
and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If |
|---|
| 37 |
``file_upload_dir`` is defined files will be streamed to temporary files in |
|---|
| 38 |
that directory. |
|---|
| 39 |
""" |
|---|
| 40 |
def __init__(self, META, input_data, upload_handlers, encoding=None): |
|---|
| 41 |
""" |
|---|
| 42 |
Initialize the MultiPartParser object. |
|---|
| 43 |
|
|---|
| 44 |
:META: |
|---|
| 45 |
The standard ``META`` dictionary in Django request objects. |
|---|
| 46 |
:input_data: |
|---|
| 47 |
The raw post data, as a bytestring. |
|---|
| 48 |
:upload_handler: |
|---|
| 49 |
An UploadHandler instance that performs operations on the uploaded |
|---|
| 50 |
data. |
|---|
| 51 |
:encoding: |
|---|
| 52 |
The encoding with which to treat the incoming data. |
|---|
| 53 |
""" |
|---|
| 54 |
|
|---|
| 55 |
# |
|---|
| 56 |
# Content-Type should containt multipart and the boundary information. |
|---|
| 57 |
# |
|---|
| 58 |
|
|---|
| 59 |
content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', '')) |
|---|
| 60 |
if not content_type.startswith('multipart/'): |
|---|
| 61 |
raise MultiPartParserError('Invalid Content-Type: %s' % content_type) |
|---|
| 62 |
|
|---|
| 63 |
# Parse the header to get the boundary to split the parts. |
|---|
| 64 |
ctypes, opts = parse_header(content_type) |
|---|
| 65 |
boundary = opts.get('boundary') |
|---|
| 66 |
if not boundary or not cgi.valid_boundary(boundary): |
|---|
| 67 |
raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary) |
|---|
| 68 |
|
|---|
| 69 |
|
|---|
| 70 |
# |
|---|
| 71 |
# Content-Length should contain the length of the body we are about |
|---|
| 72 |
# to receive. |
|---|
| 73 |
# |
|---|
| 74 |
try: |
|---|
| 75 |
content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0))) |
|---|
| 76 |
except (ValueError, TypeError): |
|---|
| 77 |
# For now set it to 0; we'll try again later on down. |
|---|
| 78 |
content_length = 0 |
|---|
| 79 |
|
|---|
| 80 |
if content_length <= 0: |
|---|
| 81 |
# This means we shouldn't continue...raise an error. |
|---|
| 82 |
raise MultiPartParserError("Invalid content length: %r" % content_length) |
|---|
| 83 |
|
|---|
| 84 |
self._boundary = boundary |
|---|
| 85 |
self._input_data = input_data |
|---|
| 86 |
|
|---|
| 87 |
# For compatibility with low-level network APIs (with 32-bit integers), |
|---|
| 88 |
# the chunk size should be < 2^31, but still divisible by 4. |
|---|
| 89 |
self._chunk_size = min(2**31-4, *[x.chunk_size for x in upload_handlers if x.chunk_size]) |
|---|
| 90 |
|
|---|
| 91 |
self._meta = META |
|---|
| 92 |
self._encoding = encoding or settings.DEFAULT_CHARSET |
|---|
| 93 |
self._content_length = content_length |
|---|
| 94 |
self._upload_handlers = upload_handlers |
|---|
| 95 |
|
|---|
| 96 |
def parse(self): |
|---|
| 97 |
""" |
|---|
| 98 |
Parse the POST data and break it into a FILES MultiValueDict and a POST |
|---|
| 99 |
MultiValueDict. |
|---|
| 100 |
|
|---|
| 101 |
Returns a tuple containing the POST and FILES dictionary, respectively. |
|---|
| 102 |
""" |
|---|
| 103 |
# We have to import QueryDict down here to avoid a circular import. |
|---|
| 104 |
from django.http import QueryDict |
|---|
| 105 |
|
|---|
| 106 |
encoding = self._encoding |
|---|
| 107 |
handlers = self._upload_handlers |
|---|
| 108 |
|
|---|
| 109 |
limited_input_data = LimitBytes(self._input_data, self._content_length) |
|---|
| 110 |
|
|---|
| 111 |
# See if the handler will want to take care of the parsing. |
|---|
| 112 |
# This allows overriding everything if somebody wants it. |
|---|
| 113 |
for handler in handlers: |
|---|
| 114 |
result = handler.handle_raw_input(limited_input_data, |
|---|
| 115 |
self._meta, |
|---|
| 116 |
self._content_length, |
|---|
| 117 |
self._boundary, |
|---|
| 118 |
encoding) |
|---|
| 119 |
if result is not None: |
|---|
| 120 |
return result[0], result[1] |
|---|
| 121 |
|
|---|
| 122 |
# Create the data structures to be used later. |
|---|
| 123 |
self._post = QueryDict('', mutable=True) |
|---|
| 124 |
self._files = MultiValueDict() |
|---|
| 125 |
|
|---|
| 126 |
# Instantiate the parser and stream: |
|---|
| 127 |
stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size)) |
|---|
| 128 |
|
|---|
| 129 |
# Whether or not to signal a file-completion at the beginning of the loop. |
|---|
| 130 |
old_field_name = None |
|---|
| 131 |
counters = [0] * len(handlers) |
|---|
| 132 |
|
|---|
| 133 |
try: |
|---|
| 134 |
for item_type, meta_data, field_stream in Parser(stream, self._boundary): |
|---|
| 135 |
if old_field_name: |
|---|
| 136 |
# We run this at the beginning of the next loop |
|---|
| 137 |
# since we cannot be sure a file is complete until |
|---|
| 138 |
# we hit the next boundary/part of the multipart content. |
|---|
| 139 |
self.handle_file_complete(old_field_name, counters) |
|---|
| 140 |
old_field_name = None |
|---|
| 141 |
|
|---|
| 142 |
try: |
|---|
| 143 |
disposition = meta_data['content-disposition'][1] |
|---|
| 144 |
field_name = disposition['name'].strip() |
|---|
| 145 |
except (KeyError, IndexError, AttributeError): |
|---|
| 146 |
continue |
|---|
| 147 |
|
|---|
| 148 |
transfer_encoding = meta_data.get('content-transfer-encoding') |
|---|
| 149 |
field_name = force_unicode(field_name, encoding, errors='replace') |
|---|
| 150 |
|
|---|
| 151 |
if item_type == FIELD: |
|---|
| 152 |
# This is a post field, we can just set it in the post |
|---|
| 153 |
if transfer_encoding == 'base64': |
|---|
| 154 |
raw_data = field_stream.read() |
|---|
| 155 |
try: |
|---|
| 156 |
data = str(raw_data).decode('base64') |
|---|
| 157 |
except: |
|---|
| 158 |
data = raw_data |
|---|
| 159 |
else: |
|---|
| 160 |
data = field_stream.read() |
|---|
| 161 |
|
|---|
| 162 |
self._post.appendlist(field_name, |
|---|
| 163 |
force_unicode(data, encoding, errors='replace')) |
|---|
| 164 |
elif item_type == FILE: |
|---|
| 165 |
# This is a file, use the handler... |
|---|
| 166 |
file_name = disposition.get('filename') |
|---|
| 167 |
if not file_name: |
|---|
| 168 |
continue |
|---|
| 169 |
file_name = force_unicode(file_name, encoding, errors='replace') |
|---|
| 170 |
file_name = self.IE_sanitize(unescape_entities(file_name)) |
|---|
| 171 |
|
|---|
| 172 |
content_type = meta_data.get('content-type', ('',))[0].strip() |
|---|
| 173 |
try: |
|---|
| 174 |
charset = meta_data.get('content-type', (0,{}))[1].get('charset', None) |
|---|
| 175 |
except: |
|---|
| 176 |
charset = None |
|---|
| 177 |
|
|---|
| 178 |
try: |
|---|
| 179 |
content_length = int(meta_data.get('content-length')[0]) |
|---|
| 180 |
except (IndexError, TypeError, ValueError): |
|---|
| 181 |
content_length = None |
|---|
| 182 |
|
|---|
| 183 |
counters = [0] * len(handlers) |
|---|
| 184 |
try: |
|---|
| 185 |
for handler in handlers: |
|---|
| 186 |
try: |
|---|
| 187 |
handler.new_file(field_name, file_name, |
|---|
| 188 |
content_type, content_length, |
|---|
| 189 |
charset) |
|---|
| 190 |
except StopFutureHandlers: |
|---|
| 191 |
break |
|---|
| 192 |
|
|---|
| 193 |
for chunk in field_stream: |
|---|
| 194 |
if transfer_encoding == 'base64': |
|---|
| 195 |
# We only special-case base64 transfer encoding |
|---|
| 196 |
try: |
|---|
| 197 |
chunk = str(chunk).decode('base64') |
|---|
| 198 |
except Exception, e: |
|---|
| 199 |
# Since this is only a chunk, any error is an unfixable error. |
|---|
| 200 |
raise MultiPartParserError("Could not decode base64 data: %r" % e) |
|---|
| 201 |
|
|---|
| 202 |
for i, handler in enumerate(handlers): |
|---|
| 203 |
chunk_length = len(chunk) |
|---|
| 204 |
chunk = handler.receive_data_chunk(chunk, |
|---|
| 205 |
counters[i]) |
|---|
| 206 |
counters[i] += chunk_length |
|---|
| 207 |
if chunk is None: |
|---|
| 208 |
# If the chunk received by the handler is None, then don't continue. |
|---|
| 209 |
break |
|---|
| 210 |
|
|---|
| 211 |
except SkipFile, e: |
|---|
| 212 |
# Just use up the rest of this file... |
|---|
| 213 |
exhaust(field_stream) |
|---|
| 214 |
else: |
|---|
| 215 |
# Handle file upload completions on next iteration. |
|---|
| 216 |
old_field_name = field_name |
|---|
| 217 |
else: |
|---|
| 218 |
# If this is neither a FIELD or a FILE, just exhaust the stream. |
|---|
| 219 |
exhaust(stream) |
|---|
| 220 |
except StopUpload, e: |
|---|
| 221 |
if not e.connection_reset: |
|---|
| 222 |
exhaust(limited_input_data) |
|---|
| 223 |
else: |
|---|
| 224 |
# Make sure that the request data is all fed |
|---|
| 225 |
exhaust(limited_input_data) |
|---|
| 226 |
|
|---|
| 227 |
# Signal that the upload has completed. |
|---|
| 228 |
for handler in handlers: |
|---|
| 229 |
retval = handler.upload_complete() |
|---|
| 230 |
if retval: |
|---|
| 231 |
break |
|---|
| 232 |
|
|---|
| 233 |
return self._post, self._files |
|---|
| 234 |
|
|---|
| 235 |
def handle_file_complete(self, old_field_name, counters): |
|---|
| 236 |
""" |
|---|
| 237 |
Handle all the signalling that takes place when a file is complete. |
|---|
| 238 |
""" |
|---|
| 239 |
for i, handler in enumerate(self._upload_handlers): |
|---|
| 240 |
file_obj = handler.file_complete(counters[i]) |
|---|
| 241 |
if file_obj: |
|---|
| 242 |
# If it returns a file object, then set the files dict. |
|---|
| 243 |
self._files.appendlist(force_unicode(old_field_name, |
|---|
| 244 |
self._encoding, |
|---|
| 245 |
errors='replace'), |
|---|
| 246 |
file_obj) |
|---|
| 247 |
break |
|---|
| 248 |
|
|---|
| 249 |
def IE_sanitize(self, filename): |
|---|
| 250 |
"""Cleanup filename from Internet Explorer full paths.""" |
|---|
| 251 |
return filename and filename[filename.rfind("\\")+1:].strip() |
|---|
| 252 |
|
|---|
| 253 |
class LazyStream(object): |
|---|
| 254 |
""" |
|---|
| 255 |
The LazyStream wrapper allows one to get and "unget" bytes from a stream. |
|---|
| 256 |
|
|---|
| 257 |
Given a producer object (an iterator that yields bytestrings), the |
|---|
| 258 |
LazyStream object will support iteration, reading, and keeping a "look-back" |
|---|
| 259 |
variable in case you need to "unget" some bytes. |
|---|
| 260 |
""" |
|---|
| 261 |
def __init__(self, producer, length=None): |
|---|
| 262 |
""" |
|---|
| 263 |
Every LazyStream must have a producer when instantiated. |
|---|
| 264 |
|
|---|
| 265 |
A producer is an iterable that returns a string each time it |
|---|
| 266 |
is called. |
|---|
| 267 |
""" |
|---|
| 268 |
self._producer = producer |
|---|
| 269 |
self._empty = False |
|---|
| 270 |
self._leftover = '' |
|---|
| 271 |
self.length = length |
|---|
| 272 |
self.position = 0 |
|---|
| 273 |
self._remaining = length |
|---|
| 274 |
self._unget_history = [] |
|---|
| 275 |
|
|---|
| 276 |
def tell(self): |
|---|
| 277 |
return self.position |
|---|
| 278 |
|
|---|
| 279 |
def read(self, size=None): |
|---|
| 280 |
def parts(): |
|---|
| 281 |
remaining = (size is not None and [size] or [self._remaining])[0] |
|---|
| 282 |
# do the whole thing in one shot if no limit was provided. |
|---|
| 283 |
if remaining is None: |
|---|
| 284 |
yield ''.join(self) |
|---|
| 285 |
return |
|---|
| 286 |
|
|---|
| 287 |
# otherwise do some bookkeeping to return exactly enough |
|---|
| 288 |
# of the stream and stashing any extra content we get from |
|---|
| 289 |
# the producer |
|---|
| 290 |
while remaining != 0: |
|---|
| 291 |
assert remaining > 0, 'remaining bytes to read should never go negative' |
|---|
| 292 |
|
|---|
| 293 |
chunk = self.next() |
|---|
| 294 |
|
|---|
| 295 |
emitting = chunk[:remaining] |
|---|
| 296 |
self.unget(chunk[remaining:]) |
|---|
| 297 |
remaining -= len(emitting) |
|---|
| 298 |
yield emitting |
|---|
| 299 |
|
|---|
| 300 |
out = ''.join(parts()) |
|---|
| 301 |
return out |
|---|
| 302 |
|
|---|
| 303 |
def next(self): |
|---|
| 304 |
""" |
|---|
| 305 |
Used when the exact number of bytes to read is unimportant. |
|---|
| 306 |
|
|---|
| 307 |
This procedure just returns whatever is chunk is conveniently returned |
|---|
| 308 |
from the iterator instead. Useful to avoid unnecessary bookkeeping if |
|---|
| 309 |
performance is an issue. |
|---|
| 310 |
""" |
|---|
| 311 |
if self._leftover: |
|---|
| 312 |
output = self._leftover |
|---|
| 313 |
self._leftover = '' |
|---|
| 314 |
else: |
|---|
| 315 |
output = self._producer.next() |
|---|
| 316 |
self._unget_history = [] |
|---|
| 317 |
self.position += len(output) |
|---|
| 318 |
return output |
|---|
| 319 |
|
|---|
| 320 |
def close(self): |
|---|
| 321 |
""" |
|---|
| 322 |
Used to invalidate/disable this lazy stream. |
|---|
| 323 |
|
|---|
| 324 |
Replaces the producer with an empty list. Any leftover bytes that have |
|---|
| 325 |
already been read will still be reported upon read() and/or next(). |
|---|
| 326 |
""" |
|---|
| 327 |
self._producer = [] |
|---|
| 328 |
|
|---|
| 329 |
def __iter__(self): |
|---|
| 330 |
return self |
|---|
| 331 |
|
|---|
| 332 |
def unget(self, bytes): |
|---|
| 333 |
""" |
|---|
| 334 |
Places bytes back onto the front of the lazy stream. |
|---|
| 335 |
|
|---|
| 336 |
Future calls to read() will return those bytes first. The |
|---|
| 337 |
stream position and thus tell() will be rewound. |
|---|
| 338 |
""" |
|---|
| 339 |
if not bytes: |
|---|
| 340 |
return |
|---|
| 341 |
self._update_unget_history(len(bytes)) |
|---|
| 342 |
self.position -= len(bytes) |
|---|
| 343 |
self._leftover = ''.join([bytes, self._leftover]) |
|---|
| 344 |
|
|---|
| 345 |
def _update_unget_history(self, num_bytes): |
|---|
| 346 |
""" |
|---|
| 347 |
Updates the unget history as a sanity check to see if we've pushed |
|---|
| 348 |
back the same number of bytes in one chunk. If we keep ungetting the |
|---|
| 349 |
same number of bytes many times (here, 50), we're mostly likely in an |
|---|
| 350 |
infinite loop of some sort. This is usually caused by a |
|---|
| 351 |
maliciously-malformed MIME request. |
|---|
| 352 |
""" |
|---|
| 353 |
self._unget_history = [num_bytes] + self._unget_history[:49] |
|---|
| 354 |
number_equal = len([current_number for current_number in self._unget_history |
|---|
| 355 |
if current_number == num_bytes]) |
|---|
| 356 |
|
|---|
| 357 |
if number_equal > 40: |
|---|
| 358 |
raise SuspiciousOperation( |
|---|
| 359 |
"The multipart parser got stuck, which shouldn't happen with" |
|---|
| 360 |
" normal uploaded files. Check for malicious upload activity;" |
|---|
| 361 |
" if there is none, report this to the Django developers." |
|---|
| 362 |
) |
|---|
| 363 |
|
|---|
| 364 |
class ChunkIter(object): |
|---|
| 365 |
""" |
|---|
| 366 |
An iterable that will yield chunks of data. Given a file-like object as the |
|---|
| 367 |
constructor, this object will yield chunks of read operations from that |
|---|
| 368 |
object. |
|---|
| 369 |
""" |
|---|
| 370 |
def __init__(self, flo, chunk_size=64 * 1024): |
|---|
| 371 |
self.flo = flo |
|---|
| 372 |
self.chunk_size = chunk_size |
|---|
| 373 |
|
|---|
| 374 |
def next(self): |
|---|
| 375 |
try: |
|---|
| 376 |
data = self.flo.read(self.chunk_size) |
|---|
| 377 |
except InputStreamExhausted: |
|---|
| 378 |
raise StopIteration() |
|---|
| 379 |
if data: |
|---|
| 380 |
return data |
|---|
| 381 |
else: |
|---|
| 382 |
raise StopIteration() |
|---|
| 383 |
|
|---|
| 384 |
def __iter__(self): |
|---|
| 385 |
return self |
|---|
| 386 |
|
|---|
| 387 |
class LimitBytes(object): |
|---|
| 388 |
""" Limit bytes for a file object. """ |
|---|
| 389 |
def __init__(self, fileobject, length): |
|---|
| 390 |
self._file = fileobject |
|---|
| 391 |
self.remaining = length |
|---|
| 392 |
|
|---|
| 393 |
def read(self, num_bytes=None): |
|---|
| 394 |
""" |
|---|
| 395 |
Read data from the underlying file. |
|---|
| 396 |
If you ask for too much or there isn't anything left, |
|---|
| 397 |
this will raise an InputStreamExhausted error. |
|---|
| 398 |
""" |
|---|
| 399 |
if self.remaining <= 0: |
|---|
| 400 |
raise InputStreamExhausted() |
|---|
| 401 |
if num_bytes is None: |
|---|
| 402 |
num_bytes = self.remaining |
|---|
| 403 |
else: |
|---|
| 404 |
num_bytes = min(num_bytes, self.remaining) |
|---|
| 405 |
self.remaining -= num_bytes |
|---|
| 406 |
return self._file.read(num_bytes) |
|---|
| 407 |
|
|---|
| 408 |
class InterBoundaryIter(object): |
|---|
| 409 |
""" |
|---|
| 410 |
A Producer that will iterate over boundaries. |
|---|
| 411 |
""" |
|---|
| 412 |
def __init__(self, stream, boundary): |
|---|
| 413 |
self._stream = stream |
|---|
| 414 |
self._boundary = boundary |
|---|
| 415 |
|
|---|
| 416 |
def __iter__(self): |
|---|
| 417 |
return self |
|---|
| 418 |
|
|---|
| 419 |
def next(self): |
|---|
| 420 |
try: |
|---|
| 421 |
return LazyStream(BoundaryIter(self._stream, self._boundary)) |
|---|
| 422 |
except InputStreamExhausted: |
|---|
| 423 |
raise StopIteration() |
|---|
| 424 |
|
|---|
| 425 |
class BoundaryIter(object): |
|---|
| 426 |
""" |
|---|
| 427 |
A Producer that is sensitive to boundaries. |
|---|
| 428 |
|
|---|
| 429 |
Will happily yield bytes until a boundary is found. Will yield the bytes |
|---|
| 430 |
before the boundary, throw away the boundary bytes themselves, and push the |
|---|
| 431 |
post-boundary bytes back on the stream. |
|---|
| 432 |
|
|---|
| 433 |
The future calls to .next() after locating the boundary will raise a |
|---|
| 434 |
StopIteration exception. |
|---|
| 435 |
""" |
|---|
| 436 |
|
|---|
| 437 |
def __init__(self, stream, boundary): |
|---|
| 438 |
self._stream = stream |
|---|
| 439 |
self._boundary = boundary |
|---|
| 440 |
self._done = False |
|---|
| 441 |
# rollback an additional six bytes because the format is like |
|---|
| 442 |
# this: CRLF<boundary>[--CRLF] |
|---|
| 443 |
self._rollback = len(boundary) + 6 |
|---|
| 444 |
|
|---|
| 445 |
# Try to use mx fast string search if available. Otherwise |
|---|
| 446 |
# use Python find. Wrap the latter for consistency. |
|---|
| 447 |
unused_char = self._stream.read(1) |
|---|
| 448 |
if not unused_char: |
|---|
| 449 |
raise InputStreamExhausted() |
|---|
| 450 |
self._stream.unget(unused_char) |
|---|
| 451 |
try: |
|---|
| 452 |
from mx.TextTools import FS |
|---|
| 453 |
self._fs = FS(boundary).find |
|---|
| 454 |
except ImportError: |
|---|
| 455 |
self._fs = lambda data: data.find(boundary) |
|---|
| 456 |
|
|---|
| 457 |
def __iter__(self): |
|---|
| 458 |
return self |
|---|
| 459 |
|
|---|
| 460 |
def next(self): |
|---|
| 461 |
if self._done: |
|---|
| 462 |
raise StopIteration() |
|---|
| 463 |
|
|---|
| 464 |
stream = self._stream |
|---|
| 465 |
rollback = self._rollback |
|---|
| 466 |
|
|---|
| 467 |
bytes_read = 0 |
|---|
| 468 |
chunks = [] |
|---|
| 469 |
for bytes in stream: |
|---|
| 470 |
bytes_read += len(bytes) |
|---|
| 471 |
chunks.append(bytes) |
|---|
| 472 |
if bytes_read > rollback: |
|---|
| 473 |
break |
|---|
| 474 |
if not bytes: |
|---|
| 475 |
break |
|---|
| 476 |
else: |
|---|
| 477 |
self._done = True |
|---|
| 478 |
|
|---|
| 479 |
if not chunks: |
|---|
| 480 |
raise StopIteration() |
|---|
| 481 |
|
|---|
| 482 |
chunk = ''.join(chunks) |
|---|
| 483 |
boundary = self._find_boundary(chunk, len(chunk) < self._rollback) |
|---|
| 484 |
|
|---|
| 485 |
if boundary: |
|---|
| 486 |
end, next = boundary |
|---|
| 487 |
stream.unget(chunk[next:]) |
|---|
| 488 |
self._done = True |
|---|
| 489 |
return chunk[:end] |
|---|
| 490 |
else: |
|---|
| 491 |
# make sure we dont treat a partial boundary (and |
|---|
| 492 |
# its separators) as data |
|---|
| 493 |
if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6): |
|---|
| 494 |
# There's nothing left, we should just return and mark as done. |
|---|
| 495 |
self._done = True |
|---|
| 496 |
return chunk |
|---|
| 497 |
else: |
|---|
| 498 |
stream.unget(chunk[-rollback:]) |
|---|
| 499 |
return chunk[:-rollback] |
|---|
| 500 |
|
|---|
| 501 |
def _find_boundary(self, data, eof = False): |
|---|
| 502 |
""" |
|---|
| 503 |
Finds a multipart boundary in data. |
|---|
| 504 |
|
|---|
| 505 |
Should no boundry exist in the data None is returned instead. Otherwise |
|---|
| 506 |
a tuple containing the indices of the following are returned: |
|---|
| 507 |
|
|---|
| 508 |
* the end of current encapsulation |
|---|
| 509 |
* the start of the next encapsulation |
|---|
| 510 |
""" |
|---|
| 511 |
index = self._fs(data) |
|---|
| 512 |
if index < 0: |
|---|
| 513 |
return None |
|---|
| 514 |
else: |
|---|
| 515 |
end = index |
|---|
| 516 |
next = index + len(self._boundary) |
|---|
| 517 |
# backup over CRLF |
|---|
| 518 |
if data[max(0,end-1)] == '\n': |
|---|
| 519 |
end -= 1 |
|---|
| 520 |
if data[max(0,end-1)] == '\r': |
|---|
| 521 |
end -= 1 |
|---|
| 522 |
return end, next |
|---|
| 523 |
|
|---|
| 524 |
def exhaust(stream_or_iterable): |
|---|
| 525 |
""" |
|---|
| 526 |
Completely exhausts an iterator or stream. |
|---|
| 527 |
|
|---|
| 528 |
Raise a MultiPartParserError if the argument is not a stream or an iterable. |
|---|
| 529 |
""" |
|---|
| 530 |
iterator = None |
|---|
| 531 |
try: |
|---|
| 532 |
iterator = iter(stream_or_iterable) |
|---|
| 533 |
except TypeError: |
|---|
| 534 |
iterator = ChunkIter(stream_or_iterable, 16384) |
|---|
| 535 |
|
|---|
| 536 |
if iterator is None: |
|---|
| 537 |
raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter') |
|---|
| 538 |
|
|---|
| 539 |
for __ in iterator: |
|---|
| 540 |
pass |
|---|
| 541 |
|
|---|
| 542 |
def parse_boundary_stream(stream, max_header_size): |
|---|
| 543 |
""" |
|---|
| 544 |
Parses one and exactly one stream that encapsulates a boundary. |
|---|
| 545 |
""" |
|---|
| 546 |
# Stream at beginning of header, look for end of header |
|---|
| 547 |
# and parse it if found. The header must fit within one |
|---|
| 548 |
# chunk. |
|---|
| 549 |
chunk = stream.read(max_header_size) |
|---|
| 550 |
|
|---|
| 551 |
# 'find' returns the top of these four bytes, so we'll |
|---|
| 552 |
# need to munch them later to prevent them from polluting |
|---|
| 553 |
# the payload. |
|---|
| 554 |
header_end = chunk.find('\r\n\r\n') |
|---|
| 555 |
|
|---|
| 556 |
def _parse_header(line): |
|---|
| 557 |
main_value_pair, params = parse_header(line) |
|---|
| 558 |
try: |
|---|
| 559 |
name, value = main_value_pair.split(':', 1) |
|---|
| 560 |
except: |
|---|
| 561 |
raise ValueError("Invalid header: %r" % line) |
|---|
| 562 |
return name, (value, params) |
|---|
| 563 |
|
|---|
| 564 |
if header_end == -1: |
|---|
| 565 |
# we find no header, so we just mark this fact and pass on |
|---|
| 566 |
# the stream verbatim |
|---|
| 567 |
stream.unget(chunk) |
|---|
| 568 |
return (RAW, {}, stream) |
|---|
| 569 |
|
|---|
| 570 |
header = chunk[:header_end] |
|---|
| 571 |
|
|---|
| 572 |
# here we place any excess chunk back onto the stream, as |
|---|
| 573 |
# well as throwing away the CRLFCRLF bytes from above. |
|---|
| 574 |
stream.unget(chunk[header_end + 4:]) |
|---|
| 575 |
|
|---|
| 576 |
TYPE = RAW |
|---|
| 577 |
outdict = {} |
|---|
| 578 |
|
|---|
| 579 |
# Eliminate blank lines |
|---|
| 580 |
for line in header.split('\r\n'): |
|---|
| 581 |
# This terminology ("main value" and "dictionary of |
|---|
| 582 |
# parameters") is from the Python docs. |
|---|
| 583 |
try: |
|---|
| 584 |
name, (value, params) = _parse_header(line) |
|---|
| 585 |
except: |
|---|
| 586 |
continue |
|---|
| 587 |
|
|---|
| 588 |
if name == 'content-disposition': |
|---|
| 589 |
TYPE = FIELD |
|---|
| 590 |
if params.get('filename'): |
|---|
| 591 |
&nbs |
|---|