200 | | smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)') |
| 200 | # There are three match blocks (A|B|C). A and B have identical structure, |
| 201 | # differing only by the matched quotation marks. These blocks catch anything |
| 202 | # in quotes as a single match group, optionally surrounded by `_()`. |
| 203 | # C is the uninteresting, trivial case -- it matches any non-empty sequence |
| 204 | # of non-space characters. |
| 205 | # |
| 206 | # Block A should be read as follows, where `` mark string literals and |
| 207 | # NG stands for "non-grouping". |
| 208 | # |
| 209 | # Block B should be read the same way, replacing `'` for `"`. |
| 210 | # |
| 211 | # Reading of block A: |
| 212 | # |
| 213 | # (?:_\()? -- NG optional match ugettext start marker `_(` in the beginning, |
| 214 | # NG needed as `_(` is a digraph |
| 215 | # |
| 216 | # `"` -- match string start marker |
| 217 | # |
| 218 | # (?:[^"\\]|\\.)* -- NG match for a possibly empty sequence of either |
| 219 | # * any character except `"` or `\`, |
| 220 | # * or digraph `\` followed by any single character |
| 221 | # |
| 222 | # `"` -- match string end marker |
| 223 | # |
| 224 | # \)? -- match optional ugettext `)` in the end |
| 225 | # |
| 226 | # Note that this lets throug both `" ")` and `\(" "`, avoiding these |
| 227 | # would make the regex needlessly complex. |
| 228 | smart_split_re = re.compile( |
| 229 | r'((?:_\()?"(?:[^"\\]|\\.)*"\)?' # block A, for "foo bar" |
| 230 | r"|(?:_\()?'(?:[^'\\]|\\.)*'\)?" # block B, for 'foo bar' |
| 231 | r"|[^\s]+)" # block C, anything other without whitespace |
| 232 | ) |
218 | | if bit[0] == '"' and bit[-1] == '"': |
219 | | yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"' |
220 | | elif bit[0] == "'" and bit[-1] == "'": |
221 | | yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" |
| 254 | prefix, suffix = '', '' |
| 255 | start, end = 1, -1 |
| 256 | if bit[0:2] == '_(' and bit[-1] == ')': |
| 257 | prefix, suffix = '_(', ')' |
| 258 | start, end = 3, -2 |
| 259 | if (bit[0] == '"' and bit[-1] == '"' |
| 260 | or bit[0:3] == '_("' and bit[-2:] == '")'): |
| 261 | yield '%s"%s"%s' % (prefix, |
| 262 | bit[start:end].replace(r'\"', '"').replace(r'\\', '\\'), |
| 263 | suffix) |
| 264 | elif (bit[0] == "'" and bit[-1] == "'" |
| 265 | or bit[0:3] == "_('" and bit[-2:] == "')"): |
| 266 | yield "%s'%s'%s" % (prefix, |
| 267 | bit[start:end].replace(r"\'", "'").replace(r'\\', '\\'), |
| 268 | suffix) |