Ticket #36777: tags.py

File tags.py, 3.1 KB (added by Caram, 4 hours ago)
Line 
1import os
2from urllib.parse import quote
3
4from django import template
5from django.urls import reverse
6from django.utils.html import format_html
7from django.utils.safestring import mark_safe
8
9register = template.Library()
10
11@register.filter
12def urlencode_path(url):
13 """
14 URL-encode a file path for use in href attributes.
15 Handles Unicode characters and special characters correctly.
16
17 This filter properly encodes characters that Django's FileField.url
18 might miss (like apostrophes) while not double-encoding already
19 encoded characters.
20
21 Usage: {{ attachment.file.url|urlencode_path }}
22 """
23 if not url:
24 return ''
25
26 from urllib.parse import urlsplit, urlunsplit, unquote
27
28 # Parse the URL into components
29 parsed = urlsplit(url)
30
31 # Decode the path first (to handle any existing encoding)
32 # then re-encode it properly
33 decoded_path = unquote(parsed.path)
34
35 # Quote the path, keeping only slashes safe
36 # This will encode apostrophes, spaces, and Unicode characters
37 encoded_path = quote(decoded_path, safe='/')
38
39 # Reconstruct the URL with the properly encoded path
40 return urlunsplit((parsed.scheme, parsed.netloc, encoded_path, parsed.query, parsed.fragment))
41
42
43@register.filter
44def filesize(file_path):
45 """
46 Get file size from a file path safely, handling Unicode filenames.
47 Returns file size in bytes, suitable for piping to filesizeformat.
48
49 This filter handles Unicode file paths correctly under Apache/WSGI
50 by using proper encoding and bypassing Django's FileField.size
51 which can fail with non-ASCII characters.
52
53 Usage: {{ attachment.file.path|filesize|filesizeformat }}
54 """
55 try:
56 if not file_path:
57 return 0
58
59 # Handle different encoding scenarios in Apache/WSGI
60 if isinstance(file_path, bytes):
61 # If it's bytes, keep as-is (already in filesystem encoding)
62 path_bytes = file_path
63 elif isinstance(file_path, str):
64 # Fix UTF-8 mojibake: UTF-8 bytes incorrectly decoded as Latin-1
65 # (common in Apache/WSGI with ASCII default encoding)
66 try:
67 # Try to encode back to bytes using Latin-1 (preserves byte values)
68 # then decode properly as UTF-8 to get the correct string
69 file_path = file_path.encode('latin-1').decode('utf-8')
70 except (UnicodeDecodeError, UnicodeEncodeError):
71 # If that fails, the path is already correctly decoded
72 pass
73
74 # Now encode to UTF-8 bytes for filesystem operations
75 # Use UTF-8 explicitly instead of os.fsencode() which uses ASCII in Apache/WSGI
76 path_bytes = file_path.encode('utf-8')
77 else:
78 return 0
79
80 # Use byte paths for filesystem operations to avoid encoding issues
81 if os.path.exists(path_bytes):
82 return os.path.getsize(path_bytes)
83 except (OSError, TypeError, UnicodeError, AttributeError) as e:
84 # Log the error for debugging if needed
85 import logging
86 logging.warning(f"filesize filter error for path {repr(file_path) if 'file_path' in locals() else 'unknown'}: {e}")
87
88 return 0
Back to Top