Code

Ticket #4165: 5099-streaming_file_upload_with_safe_file_move_progress.diff

File 5099-streaming_file_upload_with_safe_file_move_progress.diff, 15.6 KB (added by Øyvind Saltvik <oyvind@…>, 7 years ago)

Start of new improved version

Line 
1Index: django/http/__init__.py
2===================================================================
3--- django/http/__init__.py     (revision 5099)
4+++ django/http/__init__.py     (working copy)
5@@ -1,9 +1,14 @@
6-import os
7+import os, pickle
8 from Cookie import SimpleCookie
9 from pprint import pformat
10 from urllib import urlencode, quote
11 from django.utils.datastructures import MultiValueDict
12 
13+try:
14+    from cStringIO import StringIO
15+except ImportError:
16+    from StringIO import StringIO
17+
18 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
19 
20 try:
21@@ -306,3 +311,4 @@
22     if not host:
23         host = request.META.get('HTTP_HOST', '')
24     return host
25+
26Index: django/oldforms/__init__.py
27===================================================================
28--- django/oldforms/__init__.py (revision 5099)
29+++ django/oldforms/__init__.py (working copy)
30@@ -666,17 +666,22 @@
31         self.validator_list = [self.isNonEmptyFile] + validator_list
32 
33     def isNonEmptyFile(self, field_data, all_data):
34-        try:
35-            content = field_data['content']
36-        except TypeError:
37+        if field_data.has_key('_file_upload_error'):
38+            raise validators.CriticalValidationError, field_data['_file_upload_error']
39+        if not field_data.has_key('filename'):
40             raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.")
41-        if not content:
42+        if not field_data['content-length']:
43             raise validators.CriticalValidationError, gettext("The submitted file is empty.")
44 
45     def render(self, data):
46         return '<input type="file" id="%s" class="v%s" name="%s" />' % \
47             (self.get_id(), self.__class__.__name__, self.field_name)
48 
49+    def prepare(self, new_data):
50+        if new_data.has_key('_file_upload_error'):
51+            # pretend we got something in the field to raise a validation error later
52+            new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] }
53+
54     def html2python(data):
55         if data is None:
56             raise EmptyValue
57Index: django/db/models/base.py
58===================================================================
59--- django/db/models/base.py    (revision 5099)
60+++ django/db/models/base.py    (working copy)
61@@ -12,12 +12,14 @@
62 from django.dispatch import dispatcher
63 from django.utils.datastructures import SortedDict
64 from django.utils.functional import curry
65+from django.utils.file import file_move_safe
66 from django.conf import settings
67 from itertools import izip
68 import types
69 import sys
70 import os
71 
72+               
73 class ModelBase(type):
74     "Metaclass for all models"
75     def __new__(cls, name, bases, attrs):
76@@ -361,7 +363,7 @@
77     def _get_FIELD_size(self, field):
78         return os.path.getsize(self._get_FIELD_filename(field))
79 
80-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
81+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
82         directory = field.get_directory_name()
83         try: # Create the date-based directory if it doesn't exist.
84             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
85@@ -383,9 +385,13 @@
86         setattr(self, field.attname, filename)
87 
88         full_filename = self._get_FIELD_filename(field)
89-        fp = open(full_filename, 'wb')
90-        fp.write(raw_contents)
91-        fp.close()
92+        if raw_field.has_key('tmpfilename'):
93+            raw_field['tmpfile'].close()
94+            file_move_safe(raw_field['tmpfilename'], full_filename)
95+        else:
96+            fp = open(full_filename, 'wb')
97+            fp.write(raw_field['content'])
98+            fp.close()
99 
100         # Save the width and/or height, if applicable.
101         if isinstance(field, ImageField) and (field.width_field or field.height_field):
102Index: django/db/models/fields/__init__.py
103===================================================================
104--- django/db/models/fields/__init__.py (revision 5099)
105+++ django/db/models/fields/__init__.py (working copy)
106@@ -636,7 +636,7 @@
107         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
108         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
109         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
110-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
111+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
112         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
113 
114     def delete_file(self, instance):
115@@ -659,9 +659,9 @@
116         if new_data.get(upload_field_name, False):
117             func = getattr(new_object, 'save_%s_file' % self.name)
118             if rel:
119-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
120+                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0], save)
121             else:
122-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
123+                func(new_data[upload_field_name]["filename"], new_data[upload_field_name], save)
124 
125     def get_directory_name(self):
126         return os.path.normpath(datetime.datetime.now().strftime(self.upload_to))
127Index: django/conf/global_settings.py
128===================================================================
129--- django/conf/global_settings.py      (revision 5099)
130+++ django/conf/global_settings.py      (working copy)
131@@ -240,6 +240,20 @@
132 # isExistingURL validator.
133 URL_VALIDATOR_USER_AGENT = "Django/0.96pre (http://www.djangoproject.com)"
134 
135+# The directory to place streamed file uploads. The web server needs write
136+# permissions on this directory.
137+# If this is None, streaming uploads are disabled.
138+FILE_UPLOAD_DIR = None
139+
140+
141+# The minimum size of a POST before file uploads are streamed to disk.
142+# Any less than this number, and the file is uploaded to memory.
143+# Size is in bytes.
144+STREAMING_MIN_POST_SIZE = 512 * (2**10)
145+
146+
147+
148+
149 ##############
150 # MIDDLEWARE #
151 ##############
152@@ -335,3 +349,5 @@
153 
154 # The list of directories to search for fixtures
155 FIXTURE_DIRS = ()
156+
157+
158Index: django/core/handlers/wsgi.py
159===================================================================
160--- django/core/handlers/wsgi.py        (revision 5099)
161+++ django/core/handlers/wsgi.py        (working copy)
162@@ -111,7 +111,13 @@
163             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
164                 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
165                 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
166-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
167+                header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '')
168+                try:
169+                    self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input'], self.META.get('QUERY_STRING'))
170+                except:
171+                    self._post, self._files = {}, {} # make sure we dont read the input stream again
172+                    raise
173+                self._raw_post_data = None # raw data is not available for streamed multipart messages
174             else:
175                 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
176         else:
177Index: django/core/handlers/modpython.py
178===================================================================
179--- django/core/handlers/modpython.py   (revision 5099)
180+++ django/core/handlers/modpython.py   (working copy)
181@@ -47,7 +47,12 @@
182     def _load_post_and_files(self):
183         "Populates self._post and self._files"
184         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
185-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
186+            self._raw_post_data = None # raw data is not available for streamed multipart messages
187+            try:
188+                self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req, self.META.get('QUERY_STRING'))
189+            except:
190+                self._post, self._files = {}, {} # make sure we dont read the input stream again
191+                raise
192         else:
193             self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict()
194 
195Index: django/utils/file.py
196===================================================================
197--- django/utils/file.py        (revision 0)
198+++ django/utils/file.py        (revision 0)
199@@ -0,0 +1,36 @@
200+import os
201+
202+try:
203+    import shutils
204+    file_move = shutils.move
205+except:
206+    file_move = os.rename
207+
208+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64):
209+    """
210+    Moves a file from one location to another in the safest way possible.
211+   
212+    First, it tries using shutils.move, which is OS-dependent but doesn't
213+    break with change of filesystems. Then it tries os.rename, which will
214+    break if it encounters a change in filesystems. Lastly, it streams
215+    it manually from one file to another in python.
216+    """
217+   
218+    try:
219+        file_move(old_file_name, new_file_name)
220+        return
221+    except:
222+        pass
223+   
224+    new_file = open(new_file_name, 'wb')
225+    old_file = open(old_file_name, 'rb')
226+    current_chunk = None
227+   
228+    while current_chunk != '':
229+        current_chunk = old_file.read(chunk_size)
230+        new_file.write(current_chunk)
231+       
232+    new_file.close()
233+    old_file.close()
234+
235+    os.remove(old_file_name)
236Index: tests/modeltests/test_client/views.py
237===================================================================
238--- tests/modeltests/test_client/views.py       (revision 5099)
239+++ tests/modeltests/test_client/views.py       (working copy)
240@@ -44,6 +44,12 @@
241 
242     return HttpResponse(t.render(c))
243 
244+def post_file_view(request):
245+    "A view that expects a multipart post and returns a file in the context"
246+    t = Template('File {{ file.filename }} received', name='POST Template')
247+    c = Context({'file': request.FILES['file_file']})
248+    return HttpResponse(t.render(c))
249+
250 def redirect_view(request):
251     "A view that redirects all requests to the GET view"
252     return HttpResponseRedirect('/test_client/get_view/')
253Index: tests/modeltests/test_client/models.py
254===================================================================
255--- tests/modeltests/test_client/models.py      (revision 5099)
256+++ tests/modeltests/test_client/models.py      (working copy)
257@@ -75,6 +75,21 @@
258         self.assertEqual(response.template.name, "Book template")
259         self.assertEqual(response.content, "Blink - Malcolm Gladwell")
260 
261+    def test_post_file_view(self):
262+        "POST this python file to a view"
263+        import os, tempfile
264+        from django.conf import settings
265+        file = __file__.replace('.pyc', '.py')
266+        for upload_dir in [None, tempfile.gettempdir()]:
267+            settings.FILE_UPLOAD_DIR = upload_dir
268+            post_data = { 'name': file, 'file': open(file) }
269+            response = self.client.post('/test_client/post_file_view/', post_data)
270+            self.failUnless('models.py' in response.context['file']['filename'])
271+            self.failUnless(len(response.context['file']['content']) == os.path.getsize(file))
272+            if upload_dir:
273+                self.failUnless(response.context['file']['tmpfilename'])
274+
275+
276     def test_redirect(self):
277         "GET a URL that redirects elsewhere"
278         response = self.client.get('/test_client/redirect_view/')
279Index: tests/modeltests/test_client/urls.py
280===================================================================
281--- tests/modeltests/test_client/urls.py        (revision 5099)
282+++ tests/modeltests/test_client/urls.py        (working copy)
283@@ -4,6 +4,7 @@
284 urlpatterns = patterns('',
285     (r'^get_view/$', views.get_view),
286     (r'^post_view/$', views.post_view),
287+    (r'^post_file_view/$', views.post_file_view),
288     (r'^raw_post_view/$', views.raw_post_view),
289     (r'^redirect_view/$', views.redirect_view),
290     (r'^form_view/$', views.form_view),
291Index: docs/request_response.txt
292===================================================================
293--- docs/request_response.txt   (revision 5099)
294+++ docs/request_response.txt   (working copy)
295@@ -72,13 +72,25 @@
296 ``FILES``
297     A dictionary-like object containing all uploaded files. Each key in
298     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
299-    value in ``FILES`` is a standard Python dictionary with the following three
300+    value in ``FILES`` is a standard Python dictionary with the following four
301     keys:
302 
303         * ``filename`` -- The name of the uploaded file, as a Python string.
304         * ``content-type`` -- The content type of the uploaded file.
305         * ``content`` -- The raw content of the uploaded file.
306+        * ``content-length`` -- The length of the content in bytes.
307 
308+    If streaming file uploads are enabled two additional keys
309+    describing the uploaded file will be present:
310+
311+       * ``tmpfilename`` -- The filename for the temporary file.
312+       * ``tmpfile`` -- An open file object for the temporary file.
313+
314+    The temporary file will be removed when the request finishes.
315+
316+    Note that accessing ``content`` when streaming uploads are enabled
317+    will read the whole file into memory which may not be what you want.
318+
319     Note that ``FILES`` will only contain data if the request method was POST
320     and the ``<form>`` that posted to the request had
321     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
322Index: docs/settings.txt
323===================================================================
324--- docs/settings.txt   (revision 5099)
325+++ docs/settings.txt   (working copy)
326@@ -437,6 +437,15 @@
327 
328 .. _Testing Django Applications: ../testing/
329 
330+FILE_UPLOAD_DIR
331+---------------
332+
333+Default: ``None``
334+
335+Path to a directory where temporary files should be written during
336+file uploads. Leaving this as ``None`` will disable streaming file uploads,
337+and cause all uploaded files to be stored (temporarily) in memory.
338+
339 IGNORABLE_404_ENDS
340 ------------------
341 
342@@ -780,6 +789,16 @@
343 
344 .. _site framework docs: ../sites/
345 
346+STREAMING_MIN_POST_SIZE
347+-----------------------
348+
349+Default: 524288 (``512*1024``)
350+
351+An integer specifying the minimum number of bytes that has to be
352+received (in a POST) for file upload streaming to take place. Any
353+request smaller than this will be handled in memory.
354+Note: ``FILE_UPLOAD_DIR`` has to be defined to enable streaming.
355+
356 TEMPLATE_CONTEXT_PROCESSORS
357 ---------------------------
358 
359Index: docs/forms.txt
360===================================================================
361--- docs/forms.txt      (revision 5099)
362+++ docs/forms.txt      (working copy)
363@@ -475,6 +475,19 @@
364    new_data = request.POST.copy()
365    new_data.update(request.FILES)
366 
367+Streaming file uploads.
368+-----------------------
369+
370+File uploads will be read into memory by default. This works fine for
371+small to medium sized uploads (from 1MB to 100MB depending on your
372+setup and usage). If you want to support larger uploads you can enable
373+upload streaming where only a small part of the file will be in memory
374+at any time. To do this you need to specify the ``FILE_UPLOAD_DIR``
375+setting (see the settings_ document for more details).
376+
377+See `request object`_ for more details about ``request.FILES`` objects
378+with streaming file uploads enabled.
379+
380 Validators
381 ==========
382 
383@@ -693,3 +706,4 @@
384 .. _`generic views`: ../generic_views/
385 .. _`models API`: ../model-api/
386 .. _settings: ../settings/
387+.. _request object: ../request_response/#httprequest-objects