Context Navigation

Back to Ticket #7027

Ticket #7027: fix_tag_translation-refactored-re_7027.diff

File fix_tag_translation-refactored-re_7027.diff, 4.5 KB (added by mrts, 16 years ago)
Refactored the regex and wrote an explanation

django/utils/text.py

     return str(ustring_re.sub(fix, s))
 javascript_quote = allow_lazy(javascript_quote, unicode)
+smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
+# There are three match blocks (A|B|C). A and B have identical structure,
+# differing only by the matched quotation marks. These blocks catch anything
+# in quotes as a single match group, optionally surrounded by `_()`.
+# C is the uninteresting, trivial case -- it matches any non-empty sequence
+# of non-space characters.
+#
+# Block A should be read as follows, where `` mark string literals and
+# NG stands for "non-grouping".
+#
+# Block B should be read the same way, replacing `'` for `"`.
+#
+# Reading of block A:
+#
+# (?:_\()? -- NG optional match ugettext start marker `_(` in the beginning,
+#             NG needed as `_(` is a digraph
+#
+# `"` -- match string start marker
+#
+# (?:[^"\\]|\\.)+ -- NG match for a non-empty sequence of either
+#                    * any character except `"` or `\`,
+#                    * or digraph `\` followed by any single character
+#
+# `"` -- match string end marker
+#
+# \)? -- match optional ugettext `)` in the end
+#
+# Note that this lets throug both `" ")` and `\(" "`, avoiding these
+# would make the regex needlessly complex.
+smart_split_re = re.compile(
+        r'((?:_\()?"(?:[^"\\]|\\.)+"\)?' # block A, for "foo bar"
+        r"|(?:_\()?'(?:[^'\\]|\\.)+'\)?" # block B, for 'foo bar'
+        r"|[^\s]+)" # block C, anything other without whitespace
+)
 def smart_split(text):
     r"""
     Generator that splits a string by spaces, leaving quoted phrases together.
     Supports both single and double quotes, and supports escaping quotes with
     backslashes. In the output, strings will keep their initial and trailing
     quote marks.
+    quote marks. Also, gettext markers '_(', ')' are preserved.
     >>> list(smart_split(r'This is "a person\'s" test.'))
     [u'This', u'is', u'"a person\\\'s"', u'test.']
         >>> list(smart_split(r"Another 'person\'s' test."))
+        >>> list(smart_split(r"Another 'person\'s' test."))
         [u'Another', u"'person's'", u'test.']
         >>> list(smart_split(r'A "\"funky\" style" test.'))
+        >>> list(smart_split(r'A "\"funky\" style" test.'))
         [u'A', u'""funky" style"', u'test.']
+    >>> list(smart_split(' _("my quoted string") '))
+    [u'_("my quoted string")']
+    >>> list(smart_split(" _('my quoted string') "))
+    [u"_('my quoted string')"]
     """
     text = force_unicode(text)
     for bit in smart_split_re.finditer(text):
         bit = bit.group(0)
+        if bit[0] == '"' and bit[-1] == '"':
+            yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
+        elif bit[0] == "'" and bit[-1] == "'":
+            yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
+        prefix, suffix = '', ''
+        start, end = 1, -1
+        if bit[0:2] == '_(' and bit[-1] == ')':
+            prefix, suffix = '_(', ')'
+            start, end = 3, -2
+        if (bit[0] == '"' and bit[-1] == '"'
+                or bit[0:3] == '_("' and bit[-2:] == '")'):
+            yield '%s"%s"%s' % (prefix,
+                    bit[start:end].replace(r'\"', '"').replace(r'\\', '\\'),
+                    suffix)
+        elif (bit[0] == "'" and bit[-1] == "'"
+                or bit[0:3] == "_('" and bit[-2:] == "')"):
+            yield "%s'%s'%s" % (prefix,
+                    bit[start:end].replace(r"\'", "'").replace(r'\\', '\\'),
+                    suffix)
         else:
             yield bit
 smart_split = allow_lazy(smart_split, unicode)

tests/regressiontests/text/tests.py

 [u'"a', u"'one"]
 >>> print list(smart_split(r'''all friends' tests'''))[1]
 friends'
+>>> list(smart_split(' _("my quoted string") '))
+[u'_("my quoted string")']
+>>> list(smart_split(" _('my quoted string') "))
+[u"_('my quoted string')"]
+>>> print list(smart_split(" _('my \"quoted\" string') "))[0]
+_('my "quoted" string')
 ### urlquote #############################################################
 >>> from django.utils.http import urlquote, urlquote_plus

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #7027: fix_tag_translation-refactored-re_7027.diff

django/utils/text.py

tests/regressiontests/text/tests.py

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us