Django

Code

Ticket #6271: smart_split.diff

File smart_split.diff, 2.5 kB (added by SmileyChris, 1 year ago)

A rewrite of smart_split generator to properly handle quoted phrases like we need

  • django/utils/text.py

    old new  
    178178ustring_re = re.compile(u"([\u0080-\uffff])") 
    179179 
    180180def javascript_quote(s, quote_double_quotes=False): 
    181  
    182181    def fix(match): 
    183182        return r"\u%04x" % ord(match.group(1)) 
    184183 
     
    196195    return str(ustring_re.sub(fix, s)) 
    197196javascript_quote = allow_lazy(javascript_quote, unicode) 
    198197 
    199 smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)') 
     198smart_split_re = re.compile(r'("(?:[^"\\]*(?:\\.[^"\\]*)*)"' 
     199                            r"|'(?:[^'\\]*(?:\\.[^'\\]*)*)'" 
     200                            r'|[^\s"\']+|["\'])|\s+') 
    200201def smart_split(text): 
    201202    """ 
    202203    Generator that splits a string by spaces, leaving quoted phrases together. 
     
    205206    quote marks. 
    206207 
    207208    >>> list(smart_split('This is "a person\'s" test.')) 
    208     ['This', 'is', '"a person\'s"', 'test.'] 
     209    [u'This', u'is', u'"a person\'s"', u'test.'] 
     210 
     211    Even if quoted content is found in the middle of a phrase, it is considered 
     212    part of the same phrase: 
     213 
     214    >>> text = '''with thelist|filter:'A B'|another:"Y Z" as var''' 
     215    >>> list(smart_split(text)) 
     216    [u'with', u'thelist|filter:\'A B\'|another:"Y Z"', u'as', u'var'] 
    209217    """ 
    210218    text = force_unicode(text) 
     219    contents = [] 
    211220    for bit in smart_split_re.finditer(text): 
    212         bit = bit.group(0) 
    213         if bit[0] == '"' and bit[-1] == '"': 
    214             yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"' 
    215         elif bit[0] == "'" and bit[-1] == "'": 
    216             yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" 
    217         else: 
    218             yield bit 
     221        content = bit.group(1) 
     222        if content: 
     223            if content.startswith('"') and content.endswith('"'): 
     224                content = u'"%s"' % content[1:-1].replace('\\"', '"')\ 
     225                                                 .replace('\\\\', '\\') 
     226            elif content.startswith("'") and content.endswith("'"): 
     227                content = u"'%s'" % content[1:-1].replace("\\'", "'")\ 
     228                                                 .replace("\\\\", "\\") 
     229            contents.append(content) 
     230        elif contents: 
     231            yield ''.join(contents) 
     232            contents = [] 
     233    if contents: 
     234        yield ''.join(contents) 
    219235smart_split = allow_lazy(smart_split, unicode) 
    220236