Code

Ticket #15785: limited_stream.diff

File limited_stream.diff, 15.7 KB (added by tomchristie, 3 years ago)
Line 
1Index: tests/regressiontests/file_uploads/tests.py
2===================================================================
3--- tests/regressiontests/file_uploads/tests.py (revision 16013)
4+++ tests/regressiontests/file_uploads/tests.py (working copy)
5@@ -8,7 +8,7 @@
6 
7 from django.core.files import temp as tempfile
8 from django.core.files.uploadedfile import SimpleUploadedFile
9-from django.http.multipartparser import MultiPartParser
10+from django.http.multipartparser import MultiPartParser, MultiPartParserError
11 from django.test import TestCase, client
12 from django.utils import simplejson
13 from django.utils import unittest
14@@ -151,6 +151,46 @@
15         got = simplejson.loads(self.client.request(**r).content)
16         self.assertTrue(len(got['file']) < 256, "Got a long file name (%s characters)." % len(got['file']))
17 
18+    def test_truncated_multipart_handled_gracefully(self):
19+        """
20+        If passed an incomplete multipart message, MultiPartParser does not
21+        attempt to read beyond the end of the stream, and simply will handle
22+        the part that can be parsed gracefully.
23+        """
24+        payload = "\r\n".join([
25+            '--' + client.BOUNDARY,
26+            'Content-Disposition: form-data; name="file"; filename="foo.txt"',
27+            'Content-Type: application/octet-stream',
28+            '',
29+            'file contents'
30+            '--' + client.BOUNDARY + '--',
31+            '',
32+        ])
33+        payload = payload[:-10]
34+        r = {
35+            'CONTENT_LENGTH': len(payload),
36+            'CONTENT_TYPE':   client.MULTIPART_CONTENT,
37+            'PATH_INFO':      "/file_uploads/echo/",
38+            'REQUEST_METHOD': 'POST',
39+            'wsgi.input':     client.FakePayload(payload),
40+        }
41+        got = simplejson.loads(self.client.request(**r).content)
42+        self.assertEquals(got, {})
43+
44+    def test_empty_multipart_raises_error(self):
45+        """
46+        If passed an empty multipart message, MultiPartParser will throw
47+        MultiPartParserError.
48+        """
49+        r = {
50+            'CONTENT_LENGTH': 0,
51+            'CONTENT_TYPE':   client.MULTIPART_CONTENT,
52+            'PATH_INFO':      "/file_uploads/echo/",
53+            'REQUEST_METHOD': 'POST',
54+            'wsgi.input':     client.FakePayload(''),
55+        }
56+        self.assertRaises(MultiPartParserError, lambda r: self.client.request(**r), r)
57+
58     def test_custom_upload_handler(self):
59         # A small file (under the 5M quota)
60         smallfile = tempfile.NamedTemporaryFile()
61Index: tests/regressiontests/test_client_regress/views.py
62===================================================================
63--- tests/regressiontests/test_client_regress/views.py  (revision 16013)
64+++ tests/regressiontests/test_client_regress/views.py  (working copy)
65@@ -96,6 +96,10 @@
66     "A view that is requested with GET and accesses request.raw_post_data. Refs #14753."
67     return HttpResponse(request.raw_post_data)
68 
69+def read_limited_stream(request):
70+    "A view that is requested with PUT and accesses request.read(LARGE_BUFFER)."
71+    return HttpResponse(request.read(99999))
72+
73 def request_context_view(request):
74     # Special attribute that won't be present on a plain HttpRequest
75     request.special_path = request.path
76Index: tests/regressiontests/test_client_regress/models.py
77===================================================================
78--- tests/regressiontests/test_client_regress/models.py (revision 16013)
79+++ tests/regressiontests/test_client_regress/models.py (working copy)
80@@ -908,3 +908,15 @@
81             response = self.client.get("/test_client_regress/raw_post_data/")
82         except AssertionError:
83             self.fail("Accessing request.raw_post_data from a view fetched with GET by the test client shouldn't fail.")
84+
85+class ReadLimitedStreamTest(TestCase):
86+    """
87+    Attempting to read beyond META["CONTENT_LENGTH"] from the test client
88+    should simply return the full content of the request.
89+    """
90+    def test_read_limited_stream(self):
91+        try:
92+            response = self.client.put("/test_client_regress/read_limited_stream/", data={'foo':'whiz'})
93+        except AssertionError:
94+            self.fail("Reading more than META['CONTENT_LENGTH'] of data from a view fetched with PUT by the test client " +
95+                      "shouldn't fail, it should simply return the full content of the request.")       
96Index: tests/regressiontests/test_client_regress/urls.py
97===================================================================
98--- tests/regressiontests/test_client_regress/urls.py   (revision 16013)
99+++ tests/regressiontests/test_client_regress/urls.py   (working copy)
100@@ -27,5 +27,6 @@
101     (r'^check_headers/$', views.check_headers),
102     (r'^check_headers_redirect/$', RedirectView.as_view(url='/test_client_regress/check_headers/')),
103     (r'^raw_post_data/$', views.raw_post_data),
104+    (r'^read_limited_stream/$', views.read_limited_stream),
105     (r'^request_context_view/$', views.request_context_view),
106 )
107Index: tests/regressiontests/requests/tests.py
108===================================================================
109--- tests/regressiontests/requests/tests.py     (revision 16013)
110+++ tests/regressiontests/requests/tests.py     (working copy)
111@@ -156,7 +156,10 @@
112         self.assertEqual(stream.read(), '')
113 
114     def test_stream(self):
115-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
116+        payload = 'name=value'
117+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
118+                               'CONTENT_LENGTH': len(payload),
119+                               'wsgi.input': StringIO(payload)})
120         self.assertEqual(request.read(), 'name=value')
121 
122     def test_read_after_value(self):
123@@ -164,7 +167,10 @@
124         Reading from request is allowed after accessing request contents as
125         POST or raw_post_data.
126         """
127-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
128+        payload = 'name=value'
129+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
130+                               'CONTENT_LENGTH': len(payload),
131+                               'wsgi.input': StringIO(payload)})
132         self.assertEqual(request.POST, {u'name': [u'value']})
133         self.assertEqual(request.raw_post_data, 'name=value')
134         self.assertEqual(request.read(), 'name=value')
135@@ -174,7 +180,10 @@
136         Construction of POST or raw_post_data is not allowed after reading
137         from request.
138         """
139-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
140+        payload = 'name=value'
141+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
142+                               'CONTENT_LENGTH': len(payload),
143+                               'wsgi.input': StringIO(payload)})
144         self.assertEqual(request.read(2), 'na')
145         self.assertRaises(Exception, lambda: request.raw_post_data)
146         self.assertEqual(request.POST, {})
147@@ -201,14 +210,20 @@
148         self.assertRaises(Exception, lambda: request.raw_post_data)
149 
150     def test_read_by_lines(self):
151-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
152+        payload = 'name=value'
153+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
154+                               'CONTENT_LENGTH': len(payload),
155+                               'wsgi.input': StringIO(payload)})
156         self.assertEqual(list(request), ['name=value'])
157 
158     def test_POST_after_raw_post_data_read(self):
159         """
160         POST should be populated even if raw_post_data is read first
161         """
162-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
163+        payload = 'name=value'
164+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
165+                               'CONTENT_LENGTH': len(payload),
166+                               'wsgi.input': StringIO(payload)})
167         raw_data = request.raw_post_data
168         self.assertEqual(request.POST, {u'name': [u'value']})
169 
170@@ -217,7 +232,10 @@
171         POST should be populated even if raw_post_data is read first, and then
172         the stream is read second.
173         """
174-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
175+        payload = 'name=value'
176+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
177+                               'CONTENT_LENGTH': len(payload),
178+                               'wsgi.input': StringIO(payload)})
179         raw_data = request.raw_post_data
180         self.assertEqual(request.read(1), u'n')
181         self.assertEqual(request.POST, {u'name': [u'value']})
182Index: django/http/multipartparser.py
183===================================================================
184--- django/http/multipartparser.py      (revision 16013)
185+++ django/http/multipartparser.py      (working copy)
186@@ -33,7 +33,7 @@
187     A rfc2388 multipart/form-data parser.
188 
189     ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks
190-    and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If
191+    and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``.
192     """
193     def __init__(self, META, input_data, upload_handlers, encoding=None):
194         """
195@@ -65,14 +65,11 @@
196             raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)
197 
198 
199-        #
200         # Content-Length should contain the length of the body we are about
201         # to receive.
202-        #
203         try:
204             content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
205         except (ValueError, TypeError):
206-            # For now set it to 0; we'll try again later on down.
207             content_length = 0
208 
209         if content_length <= 0:
210@@ -105,12 +102,10 @@
211         encoding = self._encoding
212         handlers = self._upload_handlers
213 
214-        limited_input_data = LimitBytes(self._input_data, self._content_length)
215-
216         # See if the handler will want to take care of the parsing.
217         # This allows overriding everything if somebody wants it.
218         for handler in handlers:
219-            result = handler.handle_raw_input(limited_input_data,
220+            result = handler.handle_raw_input(self._input_data,
221                                               self._meta,
222                                               self._content_length,
223                                               self._boundary,
224@@ -123,7 +118,7 @@
225         self._files = MultiValueDict()
226 
227         # Instantiate the parser and stream:
228-        stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
229+        stream = LazyStream(ChunkIter(self._input_data, self._chunk_size))
230 
231         # Whether or not to signal a file-completion at the beginning of the loop.
232         old_field_name = None
233@@ -218,10 +213,10 @@
234                     exhaust(stream)
235         except StopUpload, e:
236             if not e.connection_reset:
237-                exhaust(limited_input_data)
238+                exhaust(self._input_data)
239         else:
240             # Make sure that the request data is all fed
241-            exhaust(limited_input_data)
242+            exhaust(self._input_data)
243 
244         # Signal that the upload has completed.
245         for handler in handlers:
246@@ -383,27 +378,6 @@
247     def __iter__(self):
248         return self
249 
250-class LimitBytes(object):
251-    """ Limit bytes for a file object. """
252-    def __init__(self, fileobject, length):
253-        self._file = fileobject
254-        self.remaining = length
255-
256-    def read(self, num_bytes=None):
257-        """
258-        Read data from the underlying file.
259-        If you ask for too much or there isn't anything left,
260-        this will raise an InputStreamExhausted error.
261-        """
262-        if self.remaining <= 0:
263-            raise InputStreamExhausted()
264-        if num_bytes is None:
265-            num_bytes = self.remaining
266-        else:
267-            num_bytes = min(num_bytes, self.remaining)
268-        self.remaining -= num_bytes
269-        return self._file.read(num_bytes)
270-
271 class InterBoundaryIter(object):
272     """
273     A Producer that will iterate over boundaries.
274Index: django/http/__init__.py
275===================================================================
276--- django/http/__init__.py     (revision 16013)
277+++ django/http/__init__.py     (working copy)
278@@ -237,17 +237,7 @@
279         if not hasattr(self, '_raw_post_data'):
280             if self._read_started:
281                 raise Exception("You cannot access raw_post_data after reading from request's data stream")
282-            try:
283-                content_length = int(self.META.get('CONTENT_LENGTH', 0))
284-            except (ValueError, TypeError):
285-                # If CONTENT_LENGTH was empty string or not an integer, don't
286-                # error out. We've also seen None passed in here (against all
287-                # specs, but see ticket #8259), so we handle TypeError as well.
288-                content_length = 0
289-            if content_length:
290-                self._raw_post_data = self.read(content_length)
291-            else:
292-                self._raw_post_data = self.read()
293+            self._raw_post_data = self.read()
294             self._stream = StringIO(self._raw_post_data)
295         return self._raw_post_data
296     raw_post_data = property(_get_raw_post_data)
297Index: django/core/handlers/wsgi.py
298===================================================================
299--- django/core/handlers/wsgi.py        (revision 16013)
300+++ django/core/handlers/wsgi.py        (working copy)
301@@ -135,26 +135,6 @@
302         self.META['SCRIPT_NAME'] = script_name
303         self.method = environ['REQUEST_METHOD'].upper()
304         self._post_parse_error = False
305-        if type(socket._fileobject) is type and isinstance(self.environ['wsgi.input'], socket._fileobject):
306-            # Under development server 'wsgi.input' is an instance of
307-            # socket._fileobject which hangs indefinitely on reading bytes past
308-            # available count. To prevent this it's wrapped in LimitedStream
309-            # that doesn't read past Content-Length bytes.
310-            #
311-            # This is not done for other kinds of inputs (like flup's FastCGI
312-            # streams) beacuse they don't suffer from this problem and we can
313-            # avoid using another wrapper with its own .read and .readline
314-            # implementation.
315-            #
316-            # The type check is done because for some reason, AppEngine
317-            # implements _fileobject as a function, not a class.
318-            try:
319-                content_length = int(self.environ.get('CONTENT_LENGTH', 0))
320-            except (ValueError, TypeError):
321-                content_length = 0
322-            self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
323-        else:
324-            self._stream = self.environ['wsgi.input']
325         self._read_started = False
326 
327     def __repr__(self):
328@@ -191,6 +171,20 @@
329         return 'wsgi.url_scheme' in self.environ \
330             and self.environ['wsgi.url_scheme'] == 'https'
331 
332+    def _get_stream(self):
333+        if not hasattr(self, '_limited_stream'):
334+            try:
335+                content_length = int(self.environ.get('CONTENT_LENGTH', 0))
336+            except (ValueError, TypeError):
337+                content_length = 0
338+
339+            self._limited_stream = LimitedStream(self.environ['wsgi.input'], content_length)
340+
341+        return self._limited_stream
342+
343+    def _set_stream(self, stream):
344+        self._limited_stream = stream
345+
346     def _get_request(self):
347         if not hasattr(self, '_request'):
348             self._request = datastructures.MergeDict(self.POST, self.GET)
349@@ -226,6 +220,7 @@
350             self._load_post_and_files()
351         return self._files
352 
353+    _stream = property(_get_stream, _set_stream)
354     GET = property(_get_get, _set_get)
355     POST = property(_get_post, _set_post)
356     COOKIES = property(_get_cookies, _set_cookies)