Ticket #2977: new_reverse_urlresolver.3.patch

File new_reverse_urlresolver.3.patch, 12.5 KB (added by Chris Beaven, 17 years ago)
  • django/core/urlresolvers.py

     
    1111from django.core.exceptions import ImproperlyConfigured, ViewDoesNotExist
    1212import re
    1313
     14re_bracket = re.compile(r'(?<!\\)((?:\\\\)*)([()])')  # Open or close bracket not preceeded by a single slash
     15re_type = type(re_bracket)
     16
     17character_set_pattern = (r'(?<!\\)((?:\\\\)*)\[((?:(?!\\)(?:\\\\)*\]|[^\]])+)\]')
     18re_unescape = re.compile(r'\\(.)|[$?*+^()]|%s' % character_set_pattern)
     19def unescape(value):
     20    """
     21    Unescape a regex string.
     22   
     23    Removes any re characters used for start/end/repetition, unescapes any
     24    escaped characters, and replaces character sets with the first matching
     25    character in that set.
     26    """
     27    def repl(m):
     28        escaped, slashes, contents = m.groups()
     29        if contents:
     30            # We're in a character set.
     31            if contents[0] == '\\':
     32                contents = contents[:2]
     33            else:
     34                contents = contents[:1]
     35            return '%s%s' % (slashes, contents)
     36        elif escaped and re.match(r'[\ddDsSwW]', escaped):
     37            # These cases shouldn't ever come up - no match possible if they do.
     38            raise ValueError(r"Regular expression notation '\%s' was outside of a group so this pattern is not reversable" % escaped)
     39        elif escaped and escaped in 'AZbB':
     40            # These cases should just return nothing.
     41            return ''
     42        # For every other case: if it's the escaped version then return it without
     43        # a slash, otherwise return nothing.
     44        return escaped or ''
     45    return re_unescape.sub(repl, value)
     46
    1447class Resolver404(Http404):
    1548    pass
    1649
     
    4275
    4376    Raises NoReverseMatch if the args/kwargs aren't valid for the regex.
    4477    """
    45     # TODO: Handle nested parenthesis in the following regex.
    46     result = re.sub(r'\(([^)]+)\)', MatchChecker(args, kwargs), regex.pattern)
    47     return result.replace('^', '').replace('$', '')
     78    # Regex can either be a string or a regular epression.
     79    if isinstance(regex, re_type):
     80        regex = regex.pattern
     81    return ReverseRegexLookup(regex).check(args, kwargs)
    4882
    49 class MatchChecker(object):
    50     "Class used in reverse RegexURLPattern lookup."
    51     def __init__(self, args, kwargs):
    52         self.args, self.kwargs = args, kwargs
    53         self.current_arg = 0
     83def tokenize(text):
     84    """
     85    Recursive tokenizer for regular expression parenthesis.
     86    """
     87    def parse(text, top=True, named_group=False):
     88        bits = []
     89        m = re_bracket.search(text)
     90        while m:
     91            before, text = text[:m.start()+len(m.group(1))], text[m.end():]
     92            if before:
     93                bits.append(before)
     94            if m.group(2) != '(':
     95                break
     96            inner_bits, text, named_group = parse(text, top=False, named_group=not top and named_group)
     97            if inner_bits:
     98                inline = named_group
     99                first_bit = inner_bits[0]
     100                if isinstance(first_bit, str):
     101                    if first_bit.startswith('?'):
     102                        # Regex extension notation.
     103                        if first_bit.startswith('?:'):
     104                            # No need to parse this non-grouping parenthesis.
     105                            inline = True
     106                            inner_bits[0] = first_bit[2:]
     107                        elif first_bit.startswith('?P'):
     108                            # Named group, set variable so higher levels will flatten.
     109                            named_group = True
     110                        else:
     111                            # Skip all other extension notation.
     112                            inner_bits = None
     113            if inner_bits:
     114                if inline:
     115                    bits.extend(inner_bits)
     116                else:
     117                    bits.append(inner_bits)
     118            m = re_bracket.search(text)
     119        return bits, text, named_group
    54120
    55     def __call__(self, match_obj):
    56         # match_obj.group(1) is the contents of the parenthesis.
    57         # First we need to figure out whether it's a named or unnamed group.
    58         #
    59         grouped = match_obj.group(1)
    60         m = re.search(r'^\?P<(\w+)>(.*?)$', grouped)
    61         if m: # If this was a named group...
    62             # m.group(1) is the name of the group
    63             # m.group(2) is the regex.
     121    def build_re(bits):
     122        output = []
     123        for bit in bits:
     124            if isinstance(bit, list):
     125                bit = build_re(bit)
     126            output.append(bit)
     127        return '(%s)' % ''.join(output)
     128
     129    bits, text, named_group = parse(text)
     130    if text:
     131        bits.append(text)
     132    # Now tokenize the bits. Each token will either be a string or a regex.
     133    tokens = []
     134    count = 0
     135    error = None
     136    for bit in bits:
     137        if isinstance(bit, list):
     138            # Build the regex here so it only has to be compiled once.
     139            bit = re.compile('%s$' % build_re(bit))
     140            count += 1
     141        else:
     142            # Unescape special characters which could possibly be used in a URL
     143            # and strip unused regular expression syntax.
    64144            try:
    65                 value = self.kwargs[m.group(1)]
    66             except KeyError:
    67                 # It was a named group, but the arg was passed in as a
    68                 # positional arg or not at all.
    69                 try:
    70                     value = self.args[self.current_arg]
    71                     self.current_arg += 1
    72                 except IndexError:
    73                     # The arg wasn't passed in.
    74                     raise NoReverseMatch('Not enough positional arguments passed in')
    75             test_regex = m.group(2)
    76         else: # Otherwise, this was a positional (unnamed) group.
    77             try:
    78                 value = self.args[self.current_arg]
    79                 self.current_arg += 1
    80             except IndexError:
    81                 # The arg wasn't passed in.
    82                 raise NoReverseMatch('Not enough positional arguments passed in')
    83             test_regex = grouped
    84         # Note we're using re.match here on purpose because the start of
    85         # to string needs to match.
    86         if not re.match(test_regex + '$', str(value)): # TODO: Unicode?
    87             raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, test_regex))
    88         return str(value) # TODO: Unicode?
     145                bit = unescape(bit)
     146            except ValueError, err:
     147                error = err
     148                break
     149        tokens.append(bit)
     150    return tokens, count, error
    89151
     152class ReverseRegexLookup(object):
     153    def __init__(self, text):
     154        self.tokens, self.minimum_arguments, self.error = tokenize(text)
     155
     156    def check(self, args=[], kwargs={}):
     157        # Note: args and kwargs will be destroyed (using .pop()) so if you need
     158        # to keep using them, pass copies.
     159        if self.error:
     160            raise NoReverseMatch(self.error)
     161        if self.minimum_arguments > len(args) + len(kwargs):
     162            raise NoReverseMatch('Not enough arguments passed in')
     163        match = []
     164        args = list(args)
     165        kwargs = kwargs.copy()
     166        for token in self.tokens:
     167            if isinstance(token, re_type):   # A regex token.
     168                value = None
     169                # Is it a named argument?
     170                if token.groupindex:
     171                    try:
     172                        value = kwargs.pop(token.groupindex.keys()[0])
     173                    except KeyError:
     174                        # It was a named group, but the arg was passed in as a
     175                        # positional arg or not at all.
     176                        pass
     177                if value is None:
     178                    try:
     179                        value = args.pop(0)
     180                    except IndexError:
     181                        # The arg wasn't passed in.
     182                        raise NoReverseMatch('Not enough positional arguments passed in')
     183                value = str(value)   # TODO: Unicode?
     184                if not token.match(value):
     185                    raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, token.pattern))
     186                match.append(value)
     187            else:    # A string token.
     188                match.append(token)
     189        match = ''.join(match)
     190        return match
     191
    90192class RegexURLPattern(object):
    91193    def __init__(self, regex, callback, default_args=None, name=None):
    92194        # regex is a string representing a regular expression.
     
    94196        # which represents the path to a module and a view function name, or a
    95197        # callable object (view).
    96198        self.regex = re.compile(regex)
     199        self.reverse_regex_lookup = ReverseRegexLookup(regex)
    97200        if callable(callback):
    98201            self._callback = callback
    99202        else:
     
    150253        return self.reverse_helper(*args, **kwargs)
    151254
    152255    def reverse_helper(self, *args, **kwargs):
    153         return reverse_helper(self.regex, *args, **kwargs)
     256        return self.reverse_regex_lookup.check(args, kwargs)
    154257
    155258class RegexURLResolver(object):
    156259    def __init__(self, regex, urlconf_name, default_kwargs=None):
    157260        # regex is a string representing a regular expression.
    158261        # urlconf_name is a string representing the module containing urlconfs.
    159262        self.regex = re.compile(regex)
     263        self.reverse_regex_lookup = ReverseRegexLookup(regex)
    160264        self.urlconf_name = urlconf_name
    161265        self.callback = None
    162266        self.default_kwargs = default_kwargs or {}
     
    230334        raise NoReverseMatch
    231335
    232336    def reverse_helper(self, lookup_view, *args, **kwargs):
     337        result = self.reverse_regex_lookup.check(args, kwargs)
     338        # .check() swallows used args, so the resolver is checking both itself
     339        # and its children using the one set of arguments.
    233340        sub_match = self.reverse(lookup_view, *args, **kwargs)
    234         result = reverse_helper(self.regex, *args, **kwargs)
    235341        return result + sub_match
    236342
    237343def resolve(path, urlconf=None):
  • tests/regressiontests/urlpatterns_reverse/tests.py

     
    2323    ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', NoReverseMatch, [], {'name': 'adrian'}),
    2424    ('^people/(?P<state>\w\w)/(\w+)/$', NoReverseMatch, ['il'], {'name': 'adrian'}),
    2525    ('^people/(?P<state>\w\w)/(\w+)/$', 'people/il/adrian/', ['adrian'], {'state': 'il'}),
     26
     27    ('^places?/$', 'places/', [], {}),
     28    ('^places+/$', 'places/', [], {}),
     29    ('^places*/$', 'places/', [], {}),
     30    ('^people/(?:name/)', 'people/name/', [], {}),
     31    ('^people/(?:name/)?', 'people/name/', [], {}),
     32    ('^character_set/[abcdef0-9]/$', 'character_set/a/', [], {}),
     33    (r'^people/((?P<state>\w\w)/test)?/(\w+)/$', 'people/il/test/adrian/', ['adrian'], {'state': 'il'}),
     34    (r'^people/((?P<state>\w\w)/test)?/(\w+)/$', NoReverseMatch, ['adrian'], {}),
     35
     36    (r'^places/(\d+|[a-z_]+)/', 'places/4/', [4], {}),
     37    (r'^places/(\d+|[a-z_]+)/', 'places/harlem/', ['harlem'], {}),
     38    (r'^places/(\d+|[a-z_]+)/', NoReverseMatch, ['harlem64'], {}),
     39
     40    (r'^price/\$(\d+)/$', 'price/$10/', ['10'], {}),
     41    (r'^price/[$](\d+)/$', 'price/$10/', ['10'], {}),
     42    (r'^price/[\$](\d+)/$', 'price/$10/', ['10'], {}),
     43    (r'^product/(?P<product>\w+)\+\(\$(?P<price>\d+(\.\d+)?)\)/$', 'product/chocolate+($2.00)/', ['2.00'], {'product': 'chocolate'}),
     44    (r'^headlines/(?P<year>\d+)\.(?P<month>\d+)\.(?P<day>\d+)/$', 'headlines/2007.5.21/', [], dict(year=2007, month=5, day=21)),
     45    (r'^windows_path/(?P<drive_name>[A-Z]):\\(?P<path>.+)/$', r'windows_path/C:\Documents and Settings\spam/', [], dict(drive_name='C', path=r'Documents and Settings\spam')),
     46    (r'^special_chars/(.+)/$', r'special_chars/+\$*/', [r'+\$*'], {}),
     47    (r'^(?P<name>.+)/\d+/$', NoReverseMatch, [], dict(name='john')),
    2648)
    2749
    2850class URLPatternReverse(unittest.TestCase):
    2951    def test_urlpattern_reverse(self):
    3052        for regex, expected, args, kwargs in test_data:
    3153            try:
    32                 got = reverse_helper(re.compile(regex), *args, **kwargs)
     54                got = reverse_helper(regex, *args, **kwargs)
    3355            except NoReverseMatch, e:
    3456                self.assertEqual(expected, NoReverseMatch)
    3557            else:
    3658                self.assertEquals(got, expected)
    3759
    3860if __name__ == "__main__":
    39     run_tests(1)
     61    unittest.main()
Back to Top