Django

Code

root/django/trunk/django/utils/encoding.py

Revision 8588, 4.5 kB (checked in by mtredinnick, 3 months ago)

Fixed #6353 (again) by making force_unicode() and smart_str() a bit more robust
in the face of funky Exception instances. This is slightly symptomatic of
problems in the calling code, but we don't want to raise a secondary exception
whilst trying to display the first one. Based on a patch from Karen Tracey.

  • Property svn:eol-style set to native
Line 
1 import types
2 import urllib
3 import datetime
4 from django.utils.functional import Promise
5
6 class DjangoUnicodeDecodeError(UnicodeDecodeError):
7     def __init__(self, obj, *args):
8         self.obj = obj
9         UnicodeDecodeError.__init__(self, *args)
10
11     def __str__(self):
12         original = UnicodeDecodeError.__str__(self)
13         return '%s. You passed in %r (%s)' % (original, self.obj,
14                 type(self.obj))
15
16 class StrAndUnicode(object):
17     """
18     A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.
19
20     Useful as a mix-in.
21     """
22     def __str__(self):
23         return self.__unicode__().encode('utf-8')
24
25 def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
26     """
27     Returns a unicode object representing 's'. Treats bytestrings using the
28     'encoding' codec.
29
30     If strings_only is True, don't convert (some) non-string-like objects.
31     """
32     if isinstance(s, Promise):
33         # The input is the result of a gettext_lazy() call.
34         return s
35     return force_unicode(s, encoding, strings_only, errors)
36
37 def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
38     """
39     Similar to smart_unicode, except that lazy instances are resolved to
40     strings, rather than kept as lazy objects.
41
42     If strings_only is True, don't convert (some) non-string-like objects.
43     """
44     if strings_only and isinstance(s, (types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float)):
45         return s
46     try:
47         if not isinstance(s, basestring,):
48             if hasattr(s, '__unicode__'):
49                 s = unicode(s)
50             else:
51                 try:
52                     s = unicode(str(s), encoding, errors)
53                 except UnicodeEncodeError:
54                     if not isinstance(s, Exception):
55                         raise
56                     # If we get to here, the caller has passed in an Exception
57                     # subclass populated with non-ASCII data without special
58                     # handling to display as a string. We need to handle this
59                     # without raising a further exception. We do an
60                     # approximation to what the Exception's standard str()
61                     # output should be.
62                     s = ' '.join([force_unicode(arg, encoding, strings_only,
63                             errors) for arg in s])
64         elif not isinstance(s, unicode):
65             # Note: We use .decode() here, instead of unicode(s, encoding,
66             # errors), so that if s is a SafeString, it ends up being a
67             # SafeUnicode at the end.
68             s = s.decode(encoding, errors)
69     except UnicodeDecodeError, e:
70         raise DjangoUnicodeDecodeError(s, *e.args)
71     return s
72
73 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
74     """
75     Returns a bytestring version of 's', encoded as specified in 'encoding'.
76
77     If strings_only is True, don't convert (some) non-string-like objects.
78     """
79     if strings_only and isinstance(s, (types.NoneType, int)):
80         return s
81     if isinstance(s, Promise):
82         return unicode(s).encode(encoding, errors)
83     elif not isinstance(s, basestring):
84         try:
85             return str(s)
86         except UnicodeEncodeError:
87             if isinstance(s, Exception):
88                 # An Exception subclass containing non-ASCII data that doesn't
89                 # know how to print itself properly. We shouldn't raise a
90                 # further exception.
91                 return ' '.join([smart_str(arg, encoding, strings_only,
92                         errors) for arg in s])
93             return unicode(s).encode(encoding, errors)
94     elif isinstance(s, unicode):
95         return s.encode(encoding, errors)
96     elif s and encoding != 'utf-8':
97         return s.decode('utf-8', errors).encode(encoding, errors)
98     else:
99         return s
100
101 def iri_to_uri(iri):
102     """
103     Convert an Internationalized Resource Identifier (IRI) portion to a URI
104     portion that is suitable for inclusion in a URL.
105
106     This is the algorithm from section 3.1 of RFC 3987.  However, since we are
107     assuming input is either UTF-8 or unicode already, we can simplify things a
108     little from the full method.
109
110     Returns an ASCII string containing the encoded result.
111     """
112     # The list of safe characters here is constructed from the printable ASCII
113     # characters that are not explicitly excluded by the list at the end of
114     # section 3.1 of RFC 3987.
115     if iri is None:
116         return iri
117     return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')
Note: See TracBrowser for help on using the browser.