| | 14 | # Set up these regular expressions outside the function so they only have to |
| | 15 | # be compiled once. |
| | 16 | re_bracket = re.compile(r'(?<!\\)((?:\\\\)*)([()])') # Open or close bracket not preceeded by a single slash |
| | 17 | re_has_named_group = re.compile(r'(?<!\\)(?:\\\\)*\(\?P') # '(?P' not preceeded by a single slash |
| | 18 | re_type = type(re_bracket) |
| | 19 | |
| | 20 | re_unescape = re.compile(r'\\(.)|[$?*+^()]') |
| | 21 | def unescape(value): |
| | 22 | """ Unescape a regex string """ |
| | 23 | def repl(m): |
| | 24 | escaped = m.group(1) |
| | 25 | if escaped and re.match(r'[\ddDsSwW]', escaped): |
| | 26 | # These cases shouldn't ever come up - no match possible if they do. |
| | 27 | raise NoReverseMatch(r"Regular expression notation '\%s' was outside of a group so this regex is not reversable" % escaped) |
| | 28 | if escaped and escaped in 'AZbB': |
| | 29 | # These cases should just return nothing. |
| | 30 | return '' |
| | 31 | # For every other case: if it's the escaped version then return it without |
| | 32 | # a slash, otherwise return nothing. |
| | 33 | return escaped or '' |
| | 34 | return re_unescape.sub(repl, value) |
| | 35 | |
| 55 | | def __call__(self, match_obj): |
| 56 | | # match_obj.group(1) is the contents of the parenthesis. |
| 57 | | # First we need to figure out whether it's a named or unnamed group. |
| 58 | | # |
| 59 | | grouped = match_obj.group(1) |
| 60 | | m = re.search(r'^\?P<(\w+)>(.*?)$', grouped) |
| 61 | | if m: # If this was a named group... |
| 62 | | # m.group(1) is the name of the group |
| 63 | | # m.group(2) is the regex. |
| 64 | | try: |
| 65 | | value = self.kwargs[m.group(1)] |
| 66 | | except KeyError: |
| 67 | | # It was a named group, but the arg was passed in as a |
| 68 | | # positional arg or not at all. |
| 69 | | try: |
| 70 | | value = self.args[self.current_arg] |
| 71 | | self.current_arg += 1 |
| 72 | | except IndexError: |
| 73 | | # The arg wasn't passed in. |
| 74 | | raise NoReverseMatch('Not enough positional arguments passed in') |
| 75 | | test_regex = m.group(2) |
| 76 | | else: # Otherwise, this was a positional (unnamed) group. |
| 77 | | try: |
| 78 | | value = self.args[self.current_arg] |
| 79 | | self.current_arg += 1 |
| 80 | | except IndexError: |
| 81 | | # The arg wasn't passed in. |
| 82 | | raise NoReverseMatch('Not enough positional arguments passed in') |
| 83 | | test_regex = grouped |
| 84 | | # Note we're using re.match here on purpose because the start of |
| 85 | | # to string needs to match. |
| 86 | | if not re.match(test_regex + '$', str(value)): # TODO: Unicode? |
| 87 | | raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, test_regex)) |
| 88 | | return str(value) # TODO: Unicode? |
| | 80 | def tokenize(text): |
| | 81 | """ |
| | 82 | Recursive tokenizer for regular expression parenthesis. |
| | 83 | """ |
| | 84 | def parse(text, top=True, named_group=False): |
| | 85 | bits = [] |
| | 86 | m = re_bracket.search(text) |
| | 87 | while m: |
| | 88 | before, text = text[:m.start()+len(m.group(1))], text[m.end():] |
| | 89 | if before: |
| | 90 | bits.append(before) |
| | 91 | if m.group(2) != '(': |
| | 92 | break |
| | 93 | inner_bits, text, named_group = parse(text, top=False, named_group=not top and named_group) |
| | 94 | if inner_bits: |
| | 95 | inline = named_group |
| | 96 | first_bit = inner_bits[0] |
| | 97 | if isinstance(first_bit, str): |
| | 98 | if first_bit.startswith('?'): |
| | 99 | # Regex extension notation. |
| | 100 | if first_bit.startswith('?:'): |
| | 101 | # No need to parse this non-grouping parenthesis. |
| | 102 | inline = True |
| | 103 | inner_bits[0] = first_bit[2:] |
| | 104 | elif first_bit.startswith('?P'): |
| | 105 | # Named group, set variable so higher levels will flatten. |
| | 106 | named_group = True |
| | 107 | else: |
| | 108 | # Skip all other extension notation. |
| | 109 | inner_bits = None |
| | 110 | if inner_bits: |
| | 111 | if inline: |
| | 112 | bits.extend(inner_bits) |
| | 113 | else: |
| | 114 | bits.append(inner_bits) |
| | 115 | m = re_bracket.search(text) |
| | 116 | return bits, text, named_group |
| | 117 | bits, text, named_group = parse(text) |
| | 118 | if text: |
| | 119 | bits.append(text) |
| | 120 | # Now tokenize the bits. Each token will either be a string or a regex. |
| | 121 | tokens = [] |
| | 122 | count = 0 |
| | 123 | for bit in bits: |
| | 124 | if isinstance(bit, list): |
| | 125 | # Build the regex here so it only has to be compiled once. |
| | 126 | bit = re.compile('%s$' % build_re(bit)) |
| | 127 | count += 1 |
| | 128 | tokens.append(bit) |
| | 129 | return tokens, count |
| | 131 | class ReverseRegexLookup(object): |
| | 132 | def __init__(self, text): |
| | 133 | self.has_named_groups = bool(re_has_named_group.search(text)) |
| | 134 | self.tokens, self.minimum_arguments = tokenize(text) |
| | 135 | |
| | 136 | def check(self, args=[], kwargs={}): |
| | 137 | # Note: args and kwargs will be destroyed (using .pop()) so if you need |
| | 138 | # to keep using them, pass copies. |
| | 139 | if self.minimum_arguments > len(args) + len(kwargs): |
| | 140 | raise NoReverseMatch('Not enough arguments passed in') |
| | 141 | match = [] |
| | 142 | args = list(args) |
| | 143 | kwargs = kwargs.copy() |
| | 144 | for token in self.tokens: |
| | 145 | if isinstance(token, re_type): # A regex token. |
| | 146 | value = None |
| | 147 | # Is it a named argument? |
| | 148 | if token.groupindex: |
| | 149 | try: |
| | 150 | value = kwargs.pop(token.groupindex.keys()[0]) |
| | 151 | except KeyError: |
| | 152 | # It was a named group, but the arg was passed in as a |
| | 153 | # positional arg or not at all. |
| | 154 | pass |
| | 155 | if value is None: |
| | 156 | try: |
| | 157 | value = args.pop(0) |
| | 158 | except IndexError: |
| | 159 | # The arg wasn't passed in. |
| | 160 | raise NoReverseMatch('Not enough positional arguments passed in') |
| | 161 | value = str(value) # TODO: Unicode? |
| | 162 | if not token.match(value): |
| | 163 | raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, token.pattern)) |
| | 164 | match.append(value) |
| | 165 | else: # A string token. |
| | 166 | match.append(token) |
| | 167 | match = ''.join(match) |
| | 168 | # Unescape special characters which could possibly be used in a URL and strip unused regular expression syntax. |
| | 169 | match = unescape(match) |
| | 170 | return match |
| | 171 | |