Ticket #2977: new_reverse_urlresolver.patch

File new_reverse_urlresolver.patch, 10.9 KB (added by Chris Beaven, 17 years ago)

complete rewrite of most of the reverse code!

  • django/core/urlresolvers.py

     
    1111from django.core.exceptions import ImproperlyConfigured, ViewDoesNotExist
    1212import re
    1313
     14# Set up these regular expressions outside the function so they only have to
     15# be compiled once.
     16re_bracket = re.compile(r'(?<!\\)(\(|\))')  # Open or close bracket not preceeded with a slash
     17re_has_named_group = re.compile(r'(?<!\\)\(\?P')
     18re_type = type(re_bracket)
     19re_unused = re.compile(r'(?<!\\)[$?*+^()]')
     20re_special = re.compile(r'\\([.+*()$])')  # Characters from the IETF URL standard, RFC 1738.
     21
    1422class Resolver404(Http404):
    1523    pass
    1624
     
    4250
    4351    Raises NoReverseMatch if the args/kwargs aren't valid for the regex.
    4452    """
    45     # TODO: Handle nested parenthesis in the following regex.
    46     result = re.sub(r'\(([^)]+)\)', MatchChecker(args, kwargs), regex.pattern)
    47     return result.replace('^', '').replace('$', '')
     53    # Regex can either be a string or a regular epression.
     54    if isinstance(regex, re_type):
     55        regex = regex.pattern
     56    return ReverseRegexLookup(regex).check(args, kwargs)
    4857
    49 class MatchChecker(object):
    50     "Class used in reverse RegexURLPattern lookup."
    51     def __init__(self, args, kwargs):
    52         self.args, self.kwargs = args, kwargs
    53         self.current_arg = 0
     58def build_re(bits):
     59    output = []
     60    for bit in bits:
     61        if isinstance(bit, list):
     62            bit = build_re(bit, top=False)
     63        output.append(bit)
     64    return '(%s)' % ''.join(output)
    5465
    55     def __call__(self, match_obj):
    56         # match_obj.group(1) is the contents of the parenthesis.
    57         # First we need to figure out whether it's a named or unnamed group.
    58         #
    59         grouped = match_obj.group(1)
    60         m = re.search(r'^\?P<(\w+)>(.*?)$', grouped)
    61         if m: # If this was a named group...
    62             # m.group(1) is the name of the group
    63             # m.group(2) is the regex.
    64             try:
    65                 value = self.kwargs[m.group(1)]
    66             except KeyError:
    67                 # It was a named group, but the arg was passed in as a
    68                 # positional arg or not at all.
    69                 try:
    70                     value = self.args[self.current_arg]
    71                     self.current_arg += 1
    72                 except IndexError:
    73                     # The arg wasn't passed in.
    74                     raise NoReverseMatch('Not enough positional arguments passed in')
    75             test_regex = m.group(2)
    76         else: # Otherwise, this was a positional (unnamed) group.
    77             try:
    78                 value = self.args[self.current_arg]
    79                 self.current_arg += 1
    80             except IndexError:
    81                 # The arg wasn't passed in.
    82                 raise NoReverseMatch('Not enough positional arguments passed in')
    83             test_regex = grouped
    84         # Note we're using re.match here on purpose because the start of
    85         # to string needs to match.
    86         if not re.match(test_regex + '$', str(value)): # TODO: Unicode?
    87             raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, test_regex))
    88         return str(value) # TODO: Unicode?
     66class ReverseRegexLookup(object):
     67    def __init__(self, text):
     68        self.has_named_groups = bool(re_has_named_group.search(text))
     69        self._tokenize(text)
    8970
     71    def _tokenize(self, text):
     72        # Recursive tokenizer for regular expression parenthesis.
     73        def parse(text):
     74            bits = []
     75            m = re_bracket.search(text)
     76            while m:
     77                before, text = text[:m.start()], text[m.end():]
     78                if before:
     79                    bits.append(before)
     80                if m.group(1) != '(':
     81                    break
     82                inner_bits, text = parse(text)
     83                if inner_bits:
     84                    inline = self.has_named_groups
     85                    skip = False
     86                    first_bit = inner_bits[0]
     87                    if isinstance(first_bit, str):
     88                        if first_bit.startswith('?'):
     89                            # Regex extension notation.
     90                            if first_bit.startswith('?:'):
     91                                # No need to parse this non-grouping parenthesis.
     92                                inline = True
     93                                inner_bits[0] = first_bit[2:]
     94                            elif first_bit.startswith('?P'):
     95                                inline = False
     96                            else:
     97                                # Skip all other extension notation.
     98                                skip = True
     99                else:
     100                    skip = True
     101                if not skip:
     102                    if inline:
     103                        bits.extend(inner_bits)
     104                    else:
     105                        bits.append(inner_bits)
     106                m = re_bracket.search(text)
     107            return bits, text
     108        self.minimum_arguments = 0
     109        bits, text = parse(text)
     110        if text:
     111            bits.append(text)
     112        # Now tokenize the bits. Each token will either be a string or a regex.
     113        tokens = []
     114        for bit in bits:
     115            if isinstance(bit, list):
     116                # We're building the regex here so it only has to be compiled
     117                # once.
     118                bit = re.compile('%s$' % build_re(bit))
     119            tokens.append(bit)
     120        self.tokens = tokens
     121
     122    def check(self, args=[], kwargs={}):
     123        # Note: args and kwargs will be destroyed (using .pop()) so if you need
     124        # to keep using them, pass copies.
     125        if self.minimum_arguments > len(args) + len(kwargs):
     126            raise NoReverseMatch('Not enough arguments passed in')
     127        match = []
     128        args = list(args)
     129        kwargs = kwargs.copy()
     130        for token in self.tokens:
     131            if isinstance(token, re_type):   # A regex token.
     132                value = None
     133                # Is it a named argument? (test by looking for a groupindex)
     134                named_argument = self.has_named_groups and token.groupindex.keys()
     135                if named_argument:
     136                    try:
     137                        value = kwargs.pop(named_argument[0])
     138                    except KeyError:
     139                        # It was a named group, but the arg was passed in as a
     140                        # positional arg or not at all.
     141                        pass
     142                if value is None:
     143                    try:
     144                        value = args.pop(0)
     145                    except IndexError:
     146                        # The arg wasn't passed in.
     147                        raise NoReverseMatch('Not enough positional arguments passed in')
     148                value = str(value)   # TODO: Unicode?
     149                if not token.match(value):
     150                    raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, token.pattern))
     151                match.append(value)
     152            else:    # A string token.
     153                match.append(token)
     154        match = ''.join(match)
     155        # Strip unused regular expression syntax.
     156        match = re_unused.sub('', match)
     157        # Unescape special characters which could possibly be used in a URL.
     158        match = re_special.sub(r'\1', match)
     159        return match
     160
    90161class RegexURLPattern(object):
    91162    def __init__(self, regex, callback, default_args=None):
    92163        # regex is a string representing a regular expression.
     
    94165        # which represents the path to a module and a view function name, or a
    95166        # callable object (view).
    96167        self.regex = re.compile(regex)
     168        self.reverse_regex_lookup = ReverseRegexLookup(regex)
    97169        if callable(callback):
    98170            self._callback = callback
    99171        else:
     
    141213        return self.reverse_helper(*args, **kwargs)
    142214
    143215    def reverse_helper(self, *args, **kwargs):
    144         return reverse_helper(self.regex, *args, **kwargs)
     216        return self.reverse_regex_lookup.check(args, kwargs)
    145217
    146218class RegexURLResolver(object):
    147219    def __init__(self, regex, urlconf_name, default_kwargs=None):
    148220        # regex is a string representing a regular expression.
    149221        # urlconf_name is a string representing the module containing urlconfs.
    150222        self.regex = re.compile(regex)
     223        self.reverse_regex_lookup = ReverseRegexLookup(regex)
    151224        self.urlconf_name = urlconf_name
    152225        self.callback = None
    153226        self.default_kwargs = default_kwargs or {}
     
    220293        raise NoReverseMatch
    221294
    222295    def reverse_helper(self, lookup_view, *args, **kwargs):
     296        result = self.reverse_regex_lookup.check(args, kwargs)
     297        # .check() swallows used args, so the resolver is checking both itself
     298        # and its children using the one set of arguments.
    223299        sub_match = self.reverse(lookup_view, *args, **kwargs)
    224         result = reverse_helper(self.regex, *args, **kwargs)
    225300        return result + sub_match
    226301
    227302def resolve(path, urlconf=None):
  • tests/regressiontests/urlpatterns_reverse/tests.py

     
    2121    ('^people/(?P<state>\w\w)/(?P<name>\d)/$', NoReverseMatch, [], {'state': 'il', 'name': 'adrian'}),
    2222    ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', NoReverseMatch, [], {'state': 'il'}),
    2323    ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', NoReverseMatch, [], {'name': 'adrian'}),
    24     ('^people/(?P<state>\w\w)/(\w+)/$', NoReverseMatch, ['il'], {'name': 'adrian'}),
    25     ('^people/(?P<state>\w\w)/(\w+)/$', 'people/il/adrian/', ['adrian'], {'state': 'il'}),
     24    # Even though the next match looks like it should have worked, if a URL has
     25    # both named and unnamed groups, only named groups should be used.
     26    ('^people/(?P<state>\w\w)/(\w+)/$', 'people/il/\w/', ['adrian'], {'state': 'il'}),
     27    ('^places?/$', 'places/', [], {}),
     28    ('^people/(?:name/)?', 'people/name/', [], {}),
     29    (r'^product/(?P<product>\w+)\+\(\$(?P<price>\d+(\.\d+)?)\)/$', 'product/chocolate+($2.00)/', ['2.00'], {'product': 'chocolate'}),
     30    (r'^places/(\d+|[a-z_]+)/', 'places/4/', [4], {}),
     31    (r'^places/(\d+|[a-z_]+)/', 'places/harlem/', ['harlem'], {}),
    2632)
    2733
    2834class URLPatternReverse(unittest.TestCase):
    2935    def test_urlpattern_reverse(self):
    3036        for regex, expected, args, kwargs in test_data:
    3137            try:
    32                 got = reverse_helper(re.compile(regex), *args, **kwargs)
     38                got = reverse_helper(regex, *args, **kwargs)
    3339            except NoReverseMatch, e:
    3440                self.assertEqual(expected, NoReverseMatch)
    3541            else:
    3642                self.assertEquals(got, expected)
    3743
    3844if __name__ == "__main__":
    39     run_tests(1)
     45    unittest.main()
Back to Top