Ticket #6271: smart_split.diff

File smart_split.diff, 2.5 KB (added by Chris Beaven, 16 years ago)

A rewrite of smart_split generator to properly handle quoted phrases like we need

  • django/utils/text.py

     
    178178ustring_re = re.compile(u"([\u0080-\uffff])")
    179179
    180180def javascript_quote(s, quote_double_quotes=False):
    181 
    182181    def fix(match):
    183182        return r"\u%04x" % ord(match.group(1))
    184183
     
    196195    return str(ustring_re.sub(fix, s))
    197196javascript_quote = allow_lazy(javascript_quote, unicode)
    198197
    199 smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
     198smart_split_re = re.compile(r'("(?:[^"\\]*(?:\\.[^"\\]*)*)"'
     199                            r"|'(?:[^'\\]*(?:\\.[^'\\]*)*)'"
     200                            r'|[^\s"\']+|["\'])|\s+')
    200201def smart_split(text):
    201202    """
    202203    Generator that splits a string by spaces, leaving quoted phrases together.
     
    205206    quote marks.
    206207
    207208    >>> list(smart_split('This is "a person\'s" test.'))
    208     ['This', 'is', '"a person\'s"', 'test.']
     209    [u'This', u'is', u'"a person\'s"', u'test.']
     210
     211    Even if quoted content is found in the middle of a phrase, it is considered
     212    part of the same phrase:
     213
     214    >>> text = '''with thelist|filter:'A B'|another:"Y Z" as var'''
     215    >>> list(smart_split(text))
     216    [u'with', u'thelist|filter:\'A B\'|another:"Y Z"', u'as', u'var']
    209217    """
    210218    text = force_unicode(text)
     219    contents = []
    211220    for bit in smart_split_re.finditer(text):
    212         bit = bit.group(0)
    213         if bit[0] == '"' and bit[-1] == '"':
    214             yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
    215         elif bit[0] == "'" and bit[-1] == "'":
    216             yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
    217         else:
    218             yield bit
     221        content = bit.group(1)
     222        if content:
     223            if content.startswith('"') and content.endswith('"'):
     224                content = u'"%s"' % content[1:-1].replace('\\"', '"')\
     225                                                 .replace('\\\\', '\\')
     226            elif content.startswith("'") and content.endswith("'"):
     227                content = u"'%s'" % content[1:-1].replace("\\'", "'")\
     228                                                 .replace("\\\\", "\\")
     229            contents.append(content)
     230        elif contents:
     231            yield ''.join(contents)
     232            contents = []
     233    if contents:
     234        yield ''.join(contents)
    219235smart_split = allow_lazy(smart_split, unicode)
    220236
Back to Top