Code

Ticket #19101: 19101-1.diff

File 19101-1.diff, 3.4 KB (added by claudep, 18 months ago)

Fixed non-ascii form data decoding with Python 3

Line 
1diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py
2index 7d2ee44..48b3b7f 100644
3--- a/django/core/handlers/wsgi.py
4+++ b/django/core/handlers/wsgi.py
5@@ -1,5 +1,6 @@
6 from __future__ import unicode_literals
7 
8+import codecs
9 import logging
10 import sys
11 from io import BytesIO
12@@ -144,6 +145,14 @@ class WSGIRequest(http.HttpRequest):
13         self.META['PATH_INFO'] = path_info
14         self.META['SCRIPT_NAME'] = script_name
15         self.method = environ['REQUEST_METHOD'].upper()
16+        if 'charset=' in self.META.get('CONTENT_TYPE', ''):
17+            charset = self.META['CONTENT_TYPE'].split('charset=')[-1]
18+            try:
19+                codecs.lookup(charset)
20+            except LookupError:
21+                pass
22+            else:
23+                self.encoding = charset
24         self._post_parse_error = False
25         try:
26             content_length = int(self.environ.get('CONTENT_LENGTH'))
27diff --git a/django/http/__init__.py b/django/http/__init__.py
28index b67c182..9999185 100644
29--- a/django/http/__init__.py
30+++ b/django/http/__init__.py
31@@ -342,7 +342,7 @@ class HttpRequest(object):
32                 self._mark_post_parse_error()
33                 raise
34         elif self.META.get('CONTENT_TYPE', '').startswith('application/x-www-form-urlencoded'):
35-            self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict()
36+            self._post, self._files = QueryDict(force_str(self.body), encoding=self._encoding), MultiValueDict()
37         else:
38             self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
39 
40diff --git a/tests/regressiontests/requests/tests.py b/tests/regressiontests/requests/tests.py
41index 2ec478a..16ceea7 100644
42--- a/tests/regressiontests/requests/tests.py
43+++ b/tests/regressiontests/requests/tests.py
44@@ -1,3 +1,4 @@
45+# -*- encoding: utf-8 -*-
46 from __future__ import unicode_literals
47 
48 import time
49@@ -12,7 +13,7 @@ from django.http import HttpRequest, HttpResponse, parse_cookie, build_request_r
50 from django.test.client import FakePayload
51 from django.test.utils import str_prefix
52 from django.utils import unittest
53-from django.utils.http import cookie_date
54+from django.utils.http import cookie_date, urlencode
55 from django.utils.timezone import utc
56 
57 
58@@ -364,6 +365,27 @@ class RequestsTests(unittest.TestCase):
59         self.assertRaises(Exception, lambda: request.body)
60         self.assertEqual(request.POST, {})
61 
62+    def test_non_ascii_POST(self):
63+        payload = FakePayload(urlencode({'key': 'España'}))
64+        request = WSGIRequest({
65+            'REQUEST_METHOD': 'POST',
66+            'CONTENT_LENGTH': len(payload),
67+            'CONTENT_TYPE': 'application/x-www-form-urlencoded',
68+            'wsgi.input': payload,
69+        })
70+        self.assertEqual(request.POST, {'key': ['España']})
71+
72+        # latin-1 encoding
73+        from django.utils.http import urllib_parse
74+        payload = FakePayload(urllib_parse.urlencode({'key': 'España'.encode('latin-1')}))
75+        request = WSGIRequest({
76+            'REQUEST_METHOD': 'POST',
77+            'CONTENT_LENGTH': len(payload),
78+            'CONTENT_TYPE': 'application/x-www-form-urlencoded; charset=iso-8859-1',
79+            'wsgi.input': payload,
80+        })
81+        self.assertEqual(request.POST, {'key': ['España']})
82+
83     def test_body_after_POST_multipart(self):
84         """
85         Reading body after parsing multipart is not allowed