Index: /home/tobryan1/workspace/django/django/core/urlresolvers.py =================================================================== --- /home/tobryan1/workspace/django/django/core/urlresolvers.py (revision 5478) +++ /home/tobryan1/workspace/django/django/core/urlresolvers.py (working copy) @@ -27,6 +27,18 @@ return callback, '' return callback[:dot], callback[dot+1:] +ESCAPE_CHARS = re.compile(r'([(){}|[\].^$*+?\\])') + +def escape(s): + """ + Escapes the characters that will get messed up later + """ + return ESCAPE_CHARS.sub(r'\\\1', s) + + +CHAR_CLASS_NEEDS_BACKSLASH = re.compile(r'\[([(){}|[\].^$*+?])\]') +CHAR_CLASS_NO_BACKSLASH = re.compile(r'\[(.)\]') + def reverse_helper(regex, *args, **kwargs): """ Does a "reverse" lookup -- returns the URL for the given args/kwargs. @@ -39,12 +51,74 @@ 'places/3/' >>> reverse_helper(re.compile('^people/(?P\w\w)/(\w+)/$'), 'adrian', state='il') 'people/il/adrian/' - + >>> reverse_helper(re.compile(r'^prices/less_than_\$(?P\d+)/$'), price='10') + 'prices/less_than_$10/' + >>> reverse_helper(re.compile(r'^prices/less_than_[$](?P\d+)/$'), price='10') + 'prices/less_than_$10/' + >>> reverse_helper(re.compile(r'^headlines/(?P\d+)\.(?P\d+)\.(?P\d+)/$'), year=2007, month=5, day=21) + 'headlines/2007.5.21/' + >>> reverse_helper(re.compile(r'^priests/(?P\w+)\+/$'), name='maynard') + 'priests/maynard+/' + >>> reverse_helper(re.compile(r'^windows_path/(?P[A-Z]):\\\\(?P.+)/$'), drive_name='C', path=r'Documents and Settings\\spam') + 'windows_path/C:\\\\Documents and Settings\\\\spam/' + >>> reverse_helper(re.compile(r'\\Aexpr\\\\b/expr2\\b\\\\Z/$')) + 'expr\\\\b/expr2\\\\Z/' + >>> reverse_helper(re.compile(r'^(?P[^/]+)/\\d+/$'), name='john') + Traceback (most recent call last): + ... + NoReverseMatch: \d must be replaced by an argument in reverse lookup + Raises NoReverseMatch if the args/kwargs aren't valid for the regex. - """ + """ # TODO: Handle nested parenthesis in the following regex. - result = re.sub(r'\(([^)]+)\)', MatchChecker(args, kwargs), regex.pattern) - return result.replace('^', '').replace('$', '') + result = re.sub(r'\(([^)]+)\)', lambda m: escape(MatchChecker(args, kwargs)(m)), regex.pattern) + #print result + # TODO: octal characters make things even more complicated + # you can use a single character class to avoid escaping, e.g. [$] or [.]. + # normalize to backslash followed by character + result = CHAR_CLASS_NEEDS_BACKSLASH.sub(r'\\\1', result) + #print result + # you can put a single character in brackets (though why you would is + # beyond me); removes the brackets + result = CHAR_CLASS_NO_BACKSLASH.sub(r'\1', result) + #print result + # \A, \Z, \b, and \B match the empty string and should be removed, but + # only if preceded by an odd number of backslashes, otherwise the backslash + # right before is actually the second backslash in the backslash escape \\ + def delete_if_slashes_odd(m): + odd_slash_match = re.match(r'^(\\\\)*\\$', m.group('slashes')) + if odd_slash_match: + return odd_slash_match.group(1) + else: + return m.group(0) + result = re.sub(r'(?P\\+)(?P[AbBZ])', delete_if_slashes_odd, result) + #print result + # ^ and $ match the empty string and should be removed, but only if + # preceded by an even number of backslashes (including none), otherwise the + # backslash right before is escaping the literal \^ or \$ + def delete_if_slashes_even(m): + even_slash_match = re.match(r'^(\\\\)*$', m.group('slashes')) + if even_slash_match: + return m.group('slashes') + else: + return m.group(0) + result = re.sub(r'(?P\\*)(?P[$^])', delete_if_slashes_even, result) + #print result + # many characters are preceded by backslashes in regexes if the literal + # character is meant; as we go to a string, the backslash should go away. + # We should never find character classes that don't have a single + # replacement character. These are \number (the group matching expression), + # \d, \D, \s, \S, \w, and \W. If we find these at this point, we raise + # an exception. + def drop_backslash_if_valid(m): + char = m.group(1) + if re.match(r'[\ddDsSwW]', char): + raise NoReverseMatch(r'\%s must be replaced by an argument in reverse lookup' % char) + else: + return char + result = re.sub(r'\\([[\]{}()^$*+?.\\|\ddDsSwW])', drop_backslash_if_valid, result) + #print result + return result class MatchChecker(object): "Class used in reverse RegexURLPattern lookup."