Code

Ticket #1484: 3013-streaming-file_rfc822.diff

File 3013-streaming-file_rfc822.diff, 13.1 KB (added by [530], 8 years ago)

Fixed readline in rfc822

Line 
1Index: django/http/__init__.py
2===================================================================
3--- django/http/__init__.py     (revision 3013)
4+++ django/http/__init__.py     (working copy)
5@@ -2,7 +2,11 @@
6 from pprint import pformat
7 from urllib import urlencode
8 from django.utils.datastructures import MultiValueDict
9+import cgi
10+import rfc822
11 
12+from StringIO import StringIO
13+
14 try:
15     # The mod_python version is more efficient, so try importing it first.
16     from mod_python.util import parse_qsl
17@@ -35,35 +39,191 @@
18     def get_full_path(self):
19         return ''
20 
21-def parse_file_upload(header_dict, post_data):
22+class Message(rfc822.Message):
23+
24+    def readheaders(self):
25+        """Read header lines. extended class to fix readline"""
26+
27+        self.dict = {}
28+        self.unixfrom = ''
29+        self.headers = list = []
30+        self.status = ''
31+        headerseen = ""
32+        firstline = 1
33+        startofline = unread = tell = None
34+        if hasattr(self.fp, 'unread'):
35+            unread = self.fp.unread
36+        elif self.seekable:
37+            tell = self.fp.tell
38+        while 1:
39+            if tell:
40+                try:
41+                    startofline = tell()
42+                except IOError:
43+                    startofline = tell = None
44+                    self.seekable = 0
45+            line = self.fp.readline(64000)
46+            if not line:
47+                self.status = 'EOF in headers'
48+                break
49+            # Skip unix From name time lines
50+            if firstline and line.startswith('From '):
51+                self.unixfrom = self.unixfrom + line
52+                continue
53+            firstline = 0
54+            if headerseen and line[0] in ' \t':
55+                # It's a continuation line.
56+                list.append(line)
57+                x = (self.dict[headerseen] + "\n " + line.strip())
58+                self.dict[headerseen] = x.strip()
59+                continue
60+            elif self.iscomment(line):
61+                # It's a comment.  Ignore it.
62+                continue
63+            elif self.islast(line):
64+                # Note! No pushback here!  The delimiter line gets eaten.
65+                break
66+            headerseen = self.isheader(line)
67+            if headerseen:
68+                # It's a legal header line, save it.
69+                list.append(line)
70+                self.dict[headerseen] = line[len(headerseen)+1:].strip()
71+                continue
72+            else:
73+                # It's not a header line; throw it back and stop here.
74+                if not self.dict:
75+                    self.status = 'No headers'
76+                else:
77+                    self.status = 'Non-header line where header expected'
78+                # Try to undo the read.
79+                if unread:
80+                    unread(line)
81+                elif tell:
82+                    self.fp.seek(startofline)
83+                else:
84+                    self.status = self.status + '; bad seek'
85+                break
86+
87+
88+class FileDict(dict):
89+    "Keeps uploaded file as a file-like object and reads its content on demand"
90+    def __getitem__(self, name):
91+        if name=='content' and not 'content' in self:
92+            self['file'].seek(0, 2)
93+            size = self['file'].tell()
94+            self['file'].seek(0, 0)
95+            self['content']=self['file'].read(size)
96+        return dict.__getitem__(self, name)
97+   
98+    def get_size(self):
99+        self['file'].seek(0, 2)   
100+        size = self['file'].tell()
101+        return size
102+
103+    def __repr__(self):
104+        return '<FileDict>'
105+
106+    def __deepcopy__(self, memo={}):
107+        self['content'] # make sure file content is loaded
108+        import copy
109+        result = self.__class__()
110+        memo[id(self)] = result
111+        for key, value in dict.items(self):
112+            dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo))
113+        return result
114+
115+class FieldStorage(cgi.FieldStorage):
116+    "cgi.FieldStorage with ability to store files on disk"
117+    def make_file(self, binary=None):
118+   
119+        import tempfile
120+        tmpfile = tempfile.NamedTemporaryFile("w+b")
121+        self.tmp_name = tmpfile.name
122+        return tmpfile
123+
124+    def read_multi(self, environ, keep_blank_values, strict_parsing):
125+        """Internal: read a part that is itself multipart."""
126+        ib = self.innerboundary
127+        if not cgi.valid_boundary(ib):
128+            raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,)
129+        self.list = []
130+        klass = self.FieldStorageClass or self.__class__
131+        part = klass(self.fp, {}, ib,
132+                     environ, keep_blank_values, strict_parsing)
133+        # Throw first part away
134+        while not part.done:
135+            headers = Message(self.fp)
136+            part = klass(self.fp, headers, ib,
137+                         environ, keep_blank_values, strict_parsing)
138+            self.list.append(part)
139+        self.skip_lines()
140+
141+    def read_lines_to_eof(self):
142+        """Internal: read lines until EOF."""
143+        while 1:
144+            line = self.fp.readline(64000) # chunked read
145+            if not line:
146+                self.done = -1
147+                break
148+            self.__write(line)
149+
150+    def read_lines_to_outerboundary(self):
151+        """Internal: read lines until outerboundary."""
152+        next = "--" + self.outerboundary
153+        last = next + "--"
154+        delim = ""
155+        while 1:
156+            line = self.fp.readline(64000) # chunked read
157+            if not line:
158+                self.done = -1
159+                break
160+            if line[:2] == "--":
161+                strippedline = line.strip()
162+                if strippedline == next:
163+                    break
164+                if strippedline == last:
165+                    self.done = 1
166+                    break
167+            odelim = delim
168+            if line[-2:] == "\r\n":
169+                delim = "\r\n"
170+                line = line[:-2]
171+            elif line[-1] == "\n":
172+                delim = "\n"
173+                line = line[:-1]
174+            else:
175+                delim = ""
176+            self.__write(odelim + line)
177+
178+
179+
180+def parse_file_upload(post_stream, environ):
181     "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)"
182-    import email, email.Message
183-    from cgi import parse_header
184-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
185-    raw_message += '\r\n\r\n' + post_data
186-    msg = email.message_from_string(raw_message)
187+    fs = FieldStorage(post_stream, environ=environ)
188     POST = MultiValueDict()
189     FILES = MultiValueDict()
190-    for submessage in msg.get_payload():
191-        if isinstance(submessage, email.Message.Message):
192-            name_dict = parse_header(submessage['Content-Disposition'])[1]
193-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
194-            # or {'name': 'blah'} for POST fields
195-            # We assume all uploaded files have a 'filename' set.
196-            if name_dict.has_key('filename'):
197-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
198-                if not name_dict['filename'].strip():
199+    for key in fs.keys():
200+        # We can't use FieldStorage.getlist to get contents of a
201+        # field as a list because for file fields it returns only filenames
202+        if type(fs[key]) == type([]):
203+            field_list = fs[key]
204+        else:
205+            field_list = [fs[key]]
206+        for field in field_list:
207+            if hasattr(field, 'filename') and field.filename is not None:
208+                if not field.filename.strip():
209                     continue
210                 # IE submits the full path, so trim everything but the basename.
211                 # (We can't use os.path.basename because it expects Linux paths.)
212-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
213-                FILES.appendlist(name_dict['name'], {
214+                filename = field.filename[field.filename.rfind("\\") + 1:]
215+                FILES.appendlist(key, FileDict({
216                     'filename': filename,
217-                    'content-type': (submessage.has_key('Content-Type') and submessage['Content-Type'] or None),
218-                    'content': submessage.get_payload(),
219-                })
220+                    'content-type': field.type,
221+                    'file': field.file,
222+                    'tmp_name': field.tmp_name
223+                }))
224             else:
225-                POST.appendlist(name_dict['name'], submessage.get_payload())
226+                POST.appendlist(key, field.value)
227     return POST, FILES
228 
229 class QueryDict(MultiValueDict):
230Index: django/db/models/base.py
231===================================================================
232--- django/db/models/base.py    (revision 3013)
233+++ django/db/models/base.py    (working copy)
234@@ -300,7 +300,7 @@
235     def _get_FIELD_size(self, field):
236         return os.path.getsize(self._get_FIELD_filename(field))
237 
238-    def _save_FIELD_file(self, field, filename, raw_contents):
239+    def _save_FIELD_file(self, field, filename, temp_file):
240         directory = field.get_directory_name()
241         try: # Create the date-based directory if it doesn't exist.
242             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
243@@ -322,10 +322,13 @@
244         setattr(self, field.attname, filename)
245 
246         full_filename = self._get_FIELD_filename(field)
247-        fp = open(full_filename, 'wb')
248-        fp.write(raw_contents)
249-        fp.close()
250+        #fp = open(full_filename, 'wb')
251+        #fp.write(raw_contents)
252+        #fp.close()
253 
254+        # move file
255+        os.rename(temp_file, full_filename)
256+
257         # Save the width and/or height, if applicable.
258         if isinstance(field, ImageField) and (field.width_field or field.height_field):
259             from django.utils.images import get_image_dimensions
260Index: django/db/models/fields/__init__.py
261===================================================================
262--- django/db/models/fields/__init__.py (revision 3013)
263+++ django/db/models/fields/__init__.py (working copy)
264@@ -594,9 +594,9 @@
265         if new_data.get(upload_field_name, False):
266             func = getattr(new_object, 'save_%s_file' % self.name)
267             if rel:
268-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"])
269+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["tmp_name"])
270             else:
271-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"])
272+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["tmp_name"])
273 
274     def get_directory_name(self):
275         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
276Index: django/forms/__init__.py
277===================================================================
278--- django/forms/__init__.py    (revision 3013)
279+++ django/forms/__init__.py    (working copy)
280@@ -641,7 +641,7 @@
281         self.validator_list = [self.isNonEmptyFile] + validator_list
282 
283     def isNonEmptyFile(self, field_data, all_data):
284-        if not field_data['content']:
285+        if field_data.get_size()<1:
286             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
287 
288     def render(self, data):
289Index: django/core/handlers/wsgi.py
290===================================================================
291--- django/core/handlers/wsgi.py        (revision 3013)
292+++ django/core/handlers/wsgi.py        (working copy)
293@@ -69,9 +69,7 @@
294         # Populates self._post and self._files
295         if self.environ['REQUEST_METHOD'] == 'POST':
296             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
297-                header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
298-                header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
299-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
300+                self._post, self._files = http.parse_file_upload(self.environ['wsgi.input'], self.environ)
301             else:
302                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
303         else:
304Index: django/core/handlers/modpython.py
305===================================================================
306--- django/core/handlers/modpython.py   (revision 3013)
307+++ django/core/handlers/modpython.py   (working copy)
308@@ -26,7 +26,10 @@
309     def _load_post_and_files(self):
310         "Populates self._post and self._files"
311         if self._req.headers_in.has_key('content-type') and self._req.headers_in['content-type'].startswith('multipart'):
312-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
313+            environ = dict(self.META)
314+            environ['CONTENT_LENGTH'] = environ['HTTP_CONTENT_LENGTH']
315+            environ['CONTENT_TYPE'] = environ['HTTP_CONTENT_TYPE']
316+            self._post, self._files = http.parse_file_upload(self._req, environ)
317         else:
318             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
319