Code

Ticket #15785: limited_stream_lazy_plus_extra_tests.diff

File limited_stream_lazy_plus_extra_tests.diff, 17.3 KB (added by tomchristie, 3 years ago)
Line 
1Index: tests/regressiontests/file_uploads/tests.py
2===================================================================
3--- tests/regressiontests/file_uploads/tests.py (revision 16013)
4+++ tests/regressiontests/file_uploads/tests.py (working copy)
5@@ -8,7 +8,7 @@
6 
7 from django.core.files import temp as tempfile
8 from django.core.files.uploadedfile import SimpleUploadedFile
9-from django.http.multipartparser import MultiPartParser
10+from django.http.multipartparser import MultiPartParser, MultiPartParserError
11 from django.test import TestCase, client
12 from django.utils import simplejson
13 from django.utils import unittest
14@@ -151,6 +151,46 @@
15         got = simplejson.loads(self.client.request(**r).content)
16         self.assertTrue(len(got['file']) < 256, "Got a long file name (%s characters)." % len(got['file']))
17 
18+    def test_truncated_multipart_handled_gracefully(self):
19+        """
20+        If passed an incomplete multipart message, MultiPartParser does not
21+        attempt to read beyond the end of the stream, and simply will handle
22+        the part that can be parsed gracefully.
23+        """
24+        payload = "\r\n".join([
25+            '--' + client.BOUNDARY,
26+            'Content-Disposition: form-data; name="file"; filename="foo.txt"',
27+            'Content-Type: application/octet-stream',
28+            '',
29+            'file contents'
30+            '--' + client.BOUNDARY + '--',
31+            '',
32+        ])
33+        payload = payload[:-10]
34+        r = {
35+            'CONTENT_LENGTH': len(payload),
36+            'CONTENT_TYPE':   client.MULTIPART_CONTENT,
37+            'PATH_INFO':      "/file_uploads/echo/",
38+            'REQUEST_METHOD': 'POST',
39+            'wsgi.input':     client.FakePayload(payload),
40+        }
41+        got = simplejson.loads(self.client.request(**r).content)
42+        self.assertEquals(got, {})
43+
44+    def test_empty_multipart_raises_error(self):
45+        """
46+        If passed an empty multipart message, MultiPartParser will throw
47+        MultiPartParserError.
48+        """
49+        r = {
50+            'CONTENT_LENGTH': 0,
51+            'CONTENT_TYPE':   client.MULTIPART_CONTENT,
52+            'PATH_INFO':      "/file_uploads/echo/",
53+            'REQUEST_METHOD': 'POST',
54+            'wsgi.input':     client.FakePayload(''),
55+        }
56+        self.assertRaises(MultiPartParserError, lambda r: self.client.request(**r), r)
57+
58     def test_custom_upload_handler(self):
59         # A small file (under the 5M quota)
60         smallfile = tempfile.NamedTemporaryFile()
61Index: tests/regressiontests/test_client_regress/views.py
62===================================================================
63--- tests/regressiontests/test_client_regress/views.py  (revision 16013)
64+++ tests/regressiontests/test_client_regress/views.py  (working copy)
65@@ -96,6 +96,14 @@
66     "A view that is requested with GET and accesses request.raw_post_data. Refs #14753."
67     return HttpResponse(request.raw_post_data)
68 
69+def read_all(request):
70+    "A view that is requested with accesses request.read()."
71+    return HttpResponse(request.read())
72+
73+def read_buffer(request):
74+    "A view that is requested with accesses request.read(LARGE_BUFFER)."
75+    return HttpResponse(request.read(99999))
76+
77 def request_context_view(request):
78     # Special attribute that won't be present on a plain HttpRequest
79     request.special_path = request.path
80Index: tests/regressiontests/test_client_regress/models.py
81===================================================================
82--- tests/regressiontests/test_client_regress/models.py (revision 16013)
83+++ tests/regressiontests/test_client_regress/models.py (working copy)
84@@ -900,11 +900,41 @@
85         response = self.client.get("/test_client_regress/request_methods/")
86         self.assertEqual(response.template, None)
87 
88-class RawPostDataTest(TestCase):
89-    "Access to request.raw_post_data from the test client."
90-    def test_raw_post_data(self):
91-        # Refs #14753
92-        try:
93-            response = self.client.get("/test_client_regress/raw_post_data/")
94-        except AssertionError:
95-            self.fail("Accessing request.raw_post_data from a view fetched with GET by the test client shouldn't fail.")
96+
97+class ReadLimitedStreamTest(TestCase):
98+    """
99+    Tests that ensure that HttpRequest.raw_post_data, HttpRequest.read() and
100+    HttpRequest.read(BUFFER) have proper LimitedStream behaviour.
101+
102+    Refs #14753, #15785
103+    """
104+    def test_raw_post_data_from_empty_request(self):
105+        """HttpRequest.raw_post_data on a test client GET request should return
106+        the empty string."""
107+        self.assertEquals(self.client.get("/test_client_regress/raw_post_data/").content, '')
108+
109+    def test_read_from_empty_request(self):
110+        """HttpRequest.read() on a test client GET request should return the
111+        empty string."""
112+        self.assertEquals(self.client.get("/test_client_regress/read_all/").content, '')
113+
114+    def test_read_numbytes_from_empty_request(self):
115+        """HttpRequest.read(LARGE_BUFFER) on a test client GET request should
116+        return the empty string."""
117+        self.assertEquals(self.client.get("/test_client_regress/read_buffer/").content, '')
118+
119+    def test_read_from_nonempty_request(self):
120+        """HttpRequest.read() on a test client PUT request with some payload
121+        should return that payload."""
122+        payload = 'foobar'
123+        self.assertEquals(self.client.put("/test_client_regress/read_all/",
124+                                          data=payload,
125+                                          content_type='text/plain').content, payload)
126+
127+    def test_read_numbytes_from_nonempty_request(self):
128+        """HttpRequest.read(LARGE_BUFFER) on a test client PUT request with
129+        some payload should return that payload."""
130+        payload = 'foobar'
131+        self.assertEquals(self.client.put("/test_client_regress/read_buffer/",
132+                                          data=payload,
133+                                          content_type='text/plain').content, payload)
134Index: tests/regressiontests/test_client_regress/urls.py
135===================================================================
136--- tests/regressiontests/test_client_regress/urls.py   (revision 16013)
137+++ tests/regressiontests/test_client_regress/urls.py   (working copy)
138@@ -27,5 +27,7 @@
139     (r'^check_headers/$', views.check_headers),
140     (r'^check_headers_redirect/$', RedirectView.as_view(url='/test_client_regress/check_headers/')),
141     (r'^raw_post_data/$', views.raw_post_data),
142+    (r'^read_all/$', views.read_all),
143+    (r'^read_buffer/$', views.read_buffer),
144     (r'^request_context_view/$', views.request_context_view),
145 )
146Index: tests/regressiontests/requests/tests.py
147===================================================================
148--- tests/regressiontests/requests/tests.py     (revision 16013)
149+++ tests/regressiontests/requests/tests.py     (working copy)
150@@ -156,7 +156,10 @@
151         self.assertEqual(stream.read(), '')
152 
153     def test_stream(self):
154-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
155+        payload = 'name=value'
156+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
157+                               'CONTENT_LENGTH': len(payload),
158+                               'wsgi.input': StringIO(payload)})
159         self.assertEqual(request.read(), 'name=value')
160 
161     def test_read_after_value(self):
162@@ -164,7 +167,10 @@
163         Reading from request is allowed after accessing request contents as
164         POST or raw_post_data.
165         """
166-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
167+        payload = 'name=value'
168+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
169+                               'CONTENT_LENGTH': len(payload),
170+                               'wsgi.input': StringIO(payload)})
171         self.assertEqual(request.POST, {u'name': [u'value']})
172         self.assertEqual(request.raw_post_data, 'name=value')
173         self.assertEqual(request.read(), 'name=value')
174@@ -174,7 +180,10 @@
175         Construction of POST or raw_post_data is not allowed after reading
176         from request.
177         """
178-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
179+        payload = 'name=value'
180+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
181+                               'CONTENT_LENGTH': len(payload),
182+                               'wsgi.input': StringIO(payload)})
183         self.assertEqual(request.read(2), 'na')
184         self.assertRaises(Exception, lambda: request.raw_post_data)
185         self.assertEqual(request.POST, {})
186@@ -201,14 +210,20 @@
187         self.assertRaises(Exception, lambda: request.raw_post_data)
188 
189     def test_read_by_lines(self):
190-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
191+        payload = 'name=value'
192+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
193+                               'CONTENT_LENGTH': len(payload),
194+                               'wsgi.input': StringIO(payload)})
195         self.assertEqual(list(request), ['name=value'])
196 
197     def test_POST_after_raw_post_data_read(self):
198         """
199         POST should be populated even if raw_post_data is read first
200         """
201-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
202+        payload = 'name=value'
203+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
204+                               'CONTENT_LENGTH': len(payload),
205+                               'wsgi.input': StringIO(payload)})
206         raw_data = request.raw_post_data
207         self.assertEqual(request.POST, {u'name': [u'value']})
208 
209@@ -217,7 +232,10 @@
210         POST should be populated even if raw_post_data is read first, and then
211         the stream is read second.
212         """
213-        request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
214+        payload = 'name=value'
215+        request = WSGIRequest({'REQUEST_METHOD': 'POST',
216+                               'CONTENT_LENGTH': len(payload),
217+                               'wsgi.input': StringIO(payload)})
218         raw_data = request.raw_post_data
219         self.assertEqual(request.read(1), u'n')
220         self.assertEqual(request.POST, {u'name': [u'value']})
221Index: django/http/multipartparser.py
222===================================================================
223--- django/http/multipartparser.py      (revision 16013)
224+++ django/http/multipartparser.py      (working copy)
225@@ -33,7 +33,7 @@
226     A rfc2388 multipart/form-data parser.
227 
228     ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks
229-    and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If
230+    and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``.
231     """
232     def __init__(self, META, input_data, upload_handlers, encoding=None):
233         """
234@@ -65,14 +65,11 @@
235             raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)
236 
237 
238-        #
239         # Content-Length should contain the length of the body we are about
240         # to receive.
241-        #
242         try:
243             content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
244         except (ValueError, TypeError):
245-            # For now set it to 0; we'll try again later on down.
246             content_length = 0
247 
248         if content_length <= 0:
249@@ -105,12 +102,10 @@
250         encoding = self._encoding
251         handlers = self._upload_handlers
252 
253-        limited_input_data = LimitBytes(self._input_data, self._content_length)
254-
255         # See if the handler will want to take care of the parsing.
256         # This allows overriding everything if somebody wants it.
257         for handler in handlers:
258-            result = handler.handle_raw_input(limited_input_data,
259+            result = handler.handle_raw_input(self._input_data,
260                                               self._meta,
261                                               self._content_length,
262                                               self._boundary,
263@@ -123,7 +118,7 @@
264         self._files = MultiValueDict()
265 
266         # Instantiate the parser and stream:
267-        stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
268+        stream = LazyStream(ChunkIter(self._input_data, self._chunk_size))
269 
270         # Whether or not to signal a file-completion at the beginning of the loop.
271         old_field_name = None
272@@ -218,10 +213,10 @@
273                     exhaust(stream)
274         except StopUpload, e:
275             if not e.connection_reset:
276-                exhaust(limited_input_data)
277+                exhaust(self._input_data)
278         else:
279             # Make sure that the request data is all fed
280-            exhaust(limited_input_data)
281+            exhaust(self._input_data)
282 
283         # Signal that the upload has completed.
284         for handler in handlers:
285@@ -383,27 +378,6 @@
286     def __iter__(self):
287         return self
288 
289-class LimitBytes(object):
290-    """ Limit bytes for a file object. """
291-    def __init__(self, fileobject, length):
292-        self._file = fileobject
293-        self.remaining = length
294-
295-    def read(self, num_bytes=None):
296-        """
297-        Read data from the underlying file.
298-        If you ask for too much or there isn't anything left,
299-        this will raise an InputStreamExhausted error.
300-        """
301-        if self.remaining <= 0:
302-            raise InputStreamExhausted()
303-        if num_bytes is None:
304-            num_bytes = self.remaining
305-        else:
306-            num_bytes = min(num_bytes, self.remaining)
307-        self.remaining -= num_bytes
308-        return self._file.read(num_bytes)
309-
310 class InterBoundaryIter(object):
311     """
312     A Producer that will iterate over boundaries.
313Index: django/http/__init__.py
314===================================================================
315--- django/http/__init__.py     (revision 16013)
316+++ django/http/__init__.py     (working copy)
317@@ -237,17 +237,7 @@
318         if not hasattr(self, '_raw_post_data'):
319             if self._read_started:
320                 raise Exception("You cannot access raw_post_data after reading from request's data stream")
321-            try:
322-                content_length = int(self.META.get('CONTENT_LENGTH', 0))
323-            except (ValueError, TypeError):
324-                # If CONTENT_LENGTH was empty string or not an integer, don't
325-                # error out. We've also seen None passed in here (against all
326-                # specs, but see ticket #8259), so we handle TypeError as well.
327-                content_length = 0
328-            if content_length:
329-                self._raw_post_data = self.read(content_length)
330-            else:
331-                self._raw_post_data = self.read()
332+            self._raw_post_data = self.read()
333             self._stream = StringIO(self._raw_post_data)
334         return self._raw_post_data
335     raw_post_data = property(_get_raw_post_data)
336Index: django/core/handlers/wsgi.py
337===================================================================
338--- django/core/handlers/wsgi.py        (revision 16013)
339+++ django/core/handlers/wsgi.py        (working copy)
340@@ -135,26 +135,6 @@
341         self.META['SCRIPT_NAME'] = script_name
342         self.method = environ['REQUEST_METHOD'].upper()
343         self._post_parse_error = False
344-        if type(socket._fileobject) is type and isinstance(self.environ['wsgi.input'], socket._fileobject):
345-            # Under development server 'wsgi.input' is an instance of
346-            # socket._fileobject which hangs indefinitely on reading bytes past
347-            # available count. To prevent this it's wrapped in LimitedStream
348-            # that doesn't read past Content-Length bytes.
349-            #
350-            # This is not done for other kinds of inputs (like flup's FastCGI
351-            # streams) beacuse they don't suffer from this problem and we can
352-            # avoid using another wrapper with its own .read and .readline
353-            # implementation.
354-            #
355-            # The type check is done because for some reason, AppEngine
356-            # implements _fileobject as a function, not a class.
357-            try:
358-                content_length = int(self.environ.get('CONTENT_LENGTH', 0))
359-            except (ValueError, TypeError):
360-                content_length = 0
361-            self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
362-        else:
363-            self._stream = self.environ['wsgi.input']
364         self._read_started = False
365 
366     def __repr__(self):
367@@ -191,6 +171,20 @@
368         return 'wsgi.url_scheme' in self.environ \
369             and self.environ['wsgi.url_scheme'] == 'https'
370 
371+    def _get_stream(self):
372+        if not hasattr(self, '_limited_stream'):
373+            try:
374+                content_length = int(self.environ.get('CONTENT_LENGTH', 0))
375+            except (ValueError, TypeError):
376+                content_length = 0
377+
378+            self._limited_stream = LimitedStream(self.environ['wsgi.input'], content_length)
379+
380+        return self._limited_stream
381+
382+    def _set_stream(self, stream):
383+        self._limited_stream = stream
384+
385     def _get_request(self):
386         if not hasattr(self, '_request'):
387             self._request = datastructures.MergeDict(self.POST, self.GET)
388@@ -226,6 +220,7 @@
389             self._load_post_and_files()
390         return self._files
391 
392+    _stream = property(_get_stream, _set_stream)
393     GET = property(_get_get, _set_get)
394     POST = property(_get_post, _set_post)
395     COOKIES = property(_get_cookies, _set_cookies)