Ticket #6271: smart_split.diff
File smart_split.diff, 2.5 KB (added by , 17 years ago) |
---|
-
django/utils/text.py
178 178 ustring_re = re.compile(u"([\u0080-\uffff])") 179 179 180 180 def javascript_quote(s, quote_double_quotes=False): 181 182 181 def fix(match): 183 182 return r"\u%04x" % ord(match.group(1)) 184 183 … … 196 195 return str(ustring_re.sub(fix, s)) 197 196 javascript_quote = allow_lazy(javascript_quote, unicode) 198 197 199 smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)') 198 smart_split_re = re.compile(r'("(?:[^"\\]*(?:\\.[^"\\]*)*)"' 199 r"|'(?:[^'\\]*(?:\\.[^'\\]*)*)'" 200 r'|[^\s"\']+|["\'])|\s+') 200 201 def smart_split(text): 201 202 """ 202 203 Generator that splits a string by spaces, leaving quoted phrases together. … … 205 206 quote marks. 206 207 207 208 >>> list(smart_split('This is "a person\'s" test.')) 208 ['This', 'is', '"a person\'s"', 'test.'] 209 [u'This', u'is', u'"a person\'s"', u'test.'] 210 211 Even if quoted content is found in the middle of a phrase, it is considered 212 part of the same phrase: 213 214 >>> text = '''with thelist|filter:'A B'|another:"Y Z" as var''' 215 >>> list(smart_split(text)) 216 [u'with', u'thelist|filter:\'A B\'|another:"Y Z"', u'as', u'var'] 209 217 """ 210 218 text = force_unicode(text) 219 contents = [] 211 220 for bit in smart_split_re.finditer(text): 212 bit = bit.group(0) 213 if bit[0] == '"' and bit[-1] == '"': 214 yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"' 215 elif bit[0] == "'" and bit[-1] == "'": 216 yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" 217 else: 218 yield bit 221 content = bit.group(1) 222 if content: 223 if content.startswith('"') and content.endswith('"'): 224 content = u'"%s"' % content[1:-1].replace('\\"', '"')\ 225 .replace('\\\\', '\\') 226 elif content.startswith("'") and content.endswith("'"): 227 content = u"'%s'" % content[1:-1].replace("\\'", "'")\ 228 .replace("\\\\", "\\") 229 contents.append(content) 230 elif contents: 231 yield ''.join(contents) 232 contents = [] 233 if contents: 234 yield ''.join(contents) 219 235 smart_split = allow_lazy(smart_split, unicode) 220 236