Ticket #7027: fix_tag_translation-refactored-re-strict_gettext_matching_7027.diff

File fix_tag_translation-refactored-re-strict_gettext_matching_7027.diff, 4.5 KB (added by mrts, 16 years ago)

This one won't let " ") and \(" " through, IMHO not worth the extra complexity though

  • django/utils/text.py

     
    197197    return str(ustring_re.sub(fix, s))
    198198javascript_quote = allow_lazy(javascript_quote, unicode)
    199199
    200 smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
     200# There are three match blocks (A|B|C). A and B have identical structure,
     201# differing only by the matched quotation marks. These blocks catch anything
     202# in quotes as a single match group, optionally surrounded by `_()`.
     203# C is the uninteresting, trivial case -- it matches any non-empty sequence
     204# of non-space characters.
     205#
     206# Block A should be read as follows, where `` mark string literals and
     207# NG stands for "non-grouping".
     208#
     209# Block B should be read the same way, replacing `'` for `"`.
     210#
     211# Reading of block A:
     212#
     213# (?:_\()? -- NG optional match ugettext start marker `_(` in the beginning,
     214#             NG needed as `_(` is a digraph
     215#
     216# `"` -- match string start marker
     217#
     218# (?:[^"\\]|\\.)+ -- NG match for a non-empty sequence of either
     219#                    * any character except `"` or `\`,
     220#                    * or digraph `\` followed by any single character
     221#
     222# `"` -- match string end marker
     223#
     224# \)? -- match optional ugettext `)` in the end
     225#
     226# This one won't let `" ")` and `\(" "` through.
     227smart_split_re = re.compile(
     228        r'((?:_\()"(?:[^"\\]|\\.)+"\)' # block A, for _("foo bar")
     229        r'|"(?:[^"\\]|\\.)+"' # block A, for "foo bar"
     230        r"|(?:_\()'(?:[^'\\]|\\.)+'\)" # block B, for _('foo bar')
     231        r"|'(?:[^'\\]|\\.)+'" # block B, for 'foo bar'
     232        r"|[^\s]+)" # block C, anything other without whitespace
     233)
    201234def smart_split(text):
    202235    r"""
    203236    Generator that splits a string by spaces, leaving quoted phrases together.
    204237    Supports both single and double quotes, and supports escaping quotes with
    205238    backslashes. In the output, strings will keep their initial and trailing
    206     quote marks.
     239    quote marks. Also, gettext markers '_(', ')' are preserved.
    207240
    208241    >>> list(smart_split(r'This is "a person\'s" test.'))
    209242    [u'This', u'is', u'"a person\\\'s"', u'test.']
    210         >>> list(smart_split(r"Another 'person\'s' test.")) 
     243        >>> list(smart_split(r"Another 'person\'s' test."))
    211244        [u'Another', u"'person's'", u'test.']
    212         >>> list(smart_split(r'A "\"funky\" style" test.')) 
     245        >>> list(smart_split(r'A "\"funky\" style" test.'))
    213246        [u'A', u'""funky" style"', u'test.']
     247    >>> list(smart_split(' _("my quoted string") '))
     248    [u'_("my quoted string")']
     249    >>> list(smart_split(" _('my quoted string') "))
     250    [u"_('my quoted string')"]
    214251    """
    215252    text = force_unicode(text)
    216253    for bit in smart_split_re.finditer(text):
    217254        bit = bit.group(0)
    218         if bit[0] == '"' and bit[-1] == '"':
    219             yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
    220         elif bit[0] == "'" and bit[-1] == "'":
    221             yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
     255        prefix, suffix = '', ''
     256        start, end = 1, -1
     257        if bit[0:2] == '_(' and bit[-1] == ')':
     258            prefix, suffix = '_(', ')'
     259            start, end = 3, -2
     260        if (bit[0] == '"' and bit[-1] == '"'
     261                or bit[0:3] == '_("' and bit[-2:] == '")'):
     262            yield '%s"%s"%s' % (prefix,
     263                    bit[start:end].replace(r'\"', '"').replace(r'\\', '\\'),
     264                    suffix)
     265        elif (bit[0] == "'" and bit[-1] == "'"
     266                or bit[0:3] == "_('" and bit[-2:] == "')"):
     267            yield "%s'%s'%s" % (prefix,
     268                    bit[start:end].replace(r"\'", "'").replace(r'\\', '\\'),
     269                    suffix)
    222270        else:
    223271            yield bit
    224272smart_split = allow_lazy(smart_split, unicode)
  • tests/regressiontests/text/tests.py

     
    1515[u'"a', u"'one"]
    1616>>> print list(smart_split(r'''all friends' tests'''))[1]
    1717friends'
     18>>> list(smart_split(' _("my quoted string") '))
     19[u'_("my quoted string")']
     20>>> list(smart_split(" _('my quoted string') "))
     21[u"_('my quoted string')"]
     22>>> print list(smart_split(" _('my \"quoted\" string') "))[0]
     23_('my "quoted" string')
    1824
    1925### urlquote #############################################################
    2026>>> from django.utils.http import urlquote, urlquote_plus
Back to Top