Django

Code

root/django/branches/gis/django/utils/text.py

Revision 7836, 8.5 kB (checked in by jbronn, 5 months ago)

gis: Merged revisions 7772-7808,7811-7814,7816-7823,7826-7829,7831-7833,7835 via svnmerge from trunk. Modified GeoWhereNode accordingly for changes in r7835.

  • Property svn:eol-style set to native
  • Property svn:keywords set to LastChangedRevision
Line 
1 import re
2 from django.conf import settings
3 from django.utils.encoding import force_unicode
4 from django.utils.functional import allow_lazy
5 from django.utils.translation import ugettext_lazy
6 from htmlentitydefs import name2codepoint
7
8 # Capitalizes the first letter of a string.
9 capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
10 capfirst = allow_lazy(capfirst, unicode)
11
12 def wrap(text, width):
13     """
14     A word-wrap function that preserves existing line breaks and most spaces in
15     the text. Expects that existing line breaks are posix newlines.
16     """
17     text = force_unicode(text)
18     def _generator():
19         it = iter(text.split(' '))
20         word = it.next()
21         yield word
22         pos = len(word) - word.rfind('\n') - 1
23         for word in it:
24             if "\n" in word:
25                 lines = word.split('\n')
26             else:
27                 lines = (word,)
28             pos += len(lines[0]) + 1
29             if pos > width:
30                 yield '\n'
31                 pos = len(lines[-1])
32             else:
33                 yield ' '
34                 if len(lines) > 1:
35                     pos = len(lines[-1])
36             yield word
37     return u''.join(_generator())
38 wrap = allow_lazy(wrap, unicode)
39
40 def truncate_words(s, num):
41     "Truncates a string after a certain number of words."
42     s = force_unicode(s)
43     length = int(num)
44     words = s.split()
45     if len(words) > length:
46         words = words[:length]
47         if not words[-1].endswith('...'):
48             words.append('...')
49     return u' '.join(words)
50 truncate_words = allow_lazy(truncate_words, unicode)
51
52 def truncate_html_words(s, num):
53     """
54     Truncates html to a certain number of words (not counting tags and
55     comments). Closes opened tags if they were correctly closed in the given
56     html.
57     """
58     s = force_unicode(s)
59     length = int(num)
60     if length <= 0:
61         return u''
62     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
63     # Set up regular expressions
64     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
65     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
66     # Count non-HTML words and keep note of open tags
67     pos = 0
68     ellipsis_pos = 0
69     words = 0
70     open_tags = []
71     while words <= length:
72         m = re_words.search(s, pos)
73         if not m:
74             # Checked through whole string
75             break
76         pos = m.end(0)
77         if m.group(1):
78             # It's an actual non-HTML word
79             words += 1
80             if words == length:
81                 ellipsis_pos = pos
82             continue
83         # Check for tag
84         tag = re_tag.match(m.group(0))
85         if not tag or ellipsis_pos:
86             # Don't worry about non tags or tags after our truncate point
87             continue
88         closing_tag, tagname, self_closing = tag.groups()
89         tagname = tagname.lower()  # Element names are always case-insensitive
90         if self_closing or tagname in html4_singlets:
91             pass
92         elif closing_tag:
93             # Check for match in open tags list
94             try:
95                 i = open_tags.index(tagname)
96             except ValueError:
97                 pass
98             else:
99                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
100                 open_tags = open_tags[i+1:]
101         else:
102             # Add it to the start of the open tags list
103             open_tags.insert(0, tagname)
104     if words <= length:
105         # Don't try to close tags if we don't need to truncate
106         return s
107     out = s[:ellipsis_pos] + ' ...'
108     # Close any tags still open
109     for tag in open_tags:
110         out += '</%s>' % tag
111     # Return string
112     return out
113 truncate_html_words = allow_lazy(truncate_html_words, unicode)
114
115 def get_valid_filename(s):
116     """
117     Returns the given string converted to a string that can be used for a clean
118     filename. Specifically, leading and trailing spaces are removed; other
119     spaces are converted to underscores; and all non-filename-safe characters
120     are removed.
121     >>> get_valid_filename("john's portrait in 2004.jpg")
122     u'johns_portrait_in_2004.jpg'
123     """
124     s = force_unicode(s).strip().replace(' ', '_')
125     return re.sub(r'[^-A-Za-z0-9_.]', '', s)
126 get_valid_filename = allow_lazy(get_valid_filename, unicode)
127
128 def get_text_list(list_, last_word=ugettext_lazy(u'or')):
129     """
130     >>> get_text_list(['a', 'b', 'c', 'd'])
131     u'a, b, c or d'
132     >>> get_text_list(['a', 'b', 'c'], 'and')
133     u'a, b and c'
134     >>> get_text_list(['a', 'b'], 'and')
135     u'a and b'
136     >>> get_text_list(['a'])
137     u'a'
138     >>> get_text_list([])
139     u''
140     """
141     if len(list_) == 0: return u''
142     if len(list_) == 1: return force_unicode(list_[0])
143     return u'%s %s %s' % (', '.join([force_unicode(i) for i in list_][:-1]), force_unicode(last_word), force_unicode(list_[-1]))
144 get_text_list = allow_lazy(get_text_list, unicode)
145
146 def normalize_newlines(text):
147     return force_unicode(re.sub(r'\r\n|\r|\n', '\n', text))
148 normalize_newlines = allow_lazy(normalize_newlines, unicode)
149
150 def recapitalize(text):
151     "Recapitalizes text, placing caps after end-of-sentence punctuation."
152     text = force_unicode(text).lower()
153     capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
154     text = capsRE.sub(lambda x: x.group(1).upper(), text)
155     return text
156 recapitalize = allow_lazy(recapitalize)
157
158 def phone2numeric(phone):
159     "Converts a phone number with letters into its numeric equivalent."
160     letters = re.compile(r'[A-PR-Y]', re.I)
161     char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
162          'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
163          'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
164          's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
165          'y': '9', 'x': '9'}.get(m.group(0).lower())
166     return letters.sub(char2number, phone)
167 phone2numeric = allow_lazy(phone2numeric)
168
169 # From http://www.xhaus.com/alan/python/httpcomp.html#gzip
170 # Used with permission.
171 def compress_string(s):
172     import cStringIO, gzip
173     zbuf = cStringIO.StringIO()
174     zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
175     zfile.write(s)
176     zfile.close()
177     return zbuf.getvalue()
178
179 ustring_re = re.compile(u"([\u0080-\uffff])")
180
181 def javascript_quote(s, quote_double_quotes=False):
182
183     def fix(match):
184         return r"\u%04x" % ord(match.group(1))
185
186     if type(s) == str:
187         s = s.decode('utf-8')
188     elif type(s) != unicode:
189         raise TypeError, s
190     s = s.replace('\\', '\\\\')
191     s = s.replace('\r', '\\r')
192     s = s.replace('\n', '\\n')
193     s = s.replace('\t', '\\t')
194     s = s.replace("'", "\\'")
195     if quote_double_quotes:
196         s = s.replace('"', '&quot;')
197     return str(ustring_re.sub(fix, s))
198 javascript_quote = allow_lazy(javascript_quote, unicode)
199
200 smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
201 def smart_split(text):
202     r"""
203     Generator that splits a string by spaces, leaving quoted phrases together.
204     Supports both single and double quotes, and supports escaping quotes with
205     backslashes. In the output, strings will keep their initial and trailing
206     quote marks.
207
208     >>> list(smart_split(r'This is "a person\'s" test.'))
209     [u'This', u'is', u'"a person\\\'s"', u'test.']
210         >>> list(smart_split(r"Another 'person\'s' test."))
211         [u'Another', u"'person's'", u'test.']
212         >>> list(smart_split(r'A "\"funky\" style" test.'))
213         [u'A', u'""funky" style"', u'test.']
214     """
215     text = force_unicode(text)
216     for bit in smart_split_re.finditer(text):
217         bit = bit.group(0)
218         if bit[0] == '"' and bit[-1] == '"':
219             yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
220         elif bit[0] == "'" and bit[-1] == "'":
221             yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
222         else:
223             yield bit
224 smart_split = allow_lazy(smart_split, unicode)
225
226 def _replace_entity(match):
227      text = match.group(1)
228      if text[0] == u'#':
229          text = text[1:]
230          try:
231              if text[0] in u'xX':
232                  c = int(text[1:], 16)
233              else:
234                  c = int(text)
235              return unichr(c)
236          except ValueError:
237              return match.group(0)
238      else:
239          try:
240              return unichr(name2codepoint[text])
241          except (ValueError, KeyError):
242              return match.group(0)
243
244 _entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
245
246 def unescape_entities(text):
247      return _entity_re.sub(_replace_entity, text)
248 unescape_entities = allow_lazy(unescape_entities, unicode)
Note: See TracBrowser for help on using the browser.