Ticket #13704: 13704-3.diff

File 13704-3.diff, 4.0 KB (added by Claude Paroz, 12 years ago)

Patch with tests

  • django/utils/html.py

    diff --git a/django/utils/html.py b/django/utils/html.py
    index 2687eb5..f75e812 100644
    a b  
    22
    33import re
    44import string
     5from urlparse import urlparse, urlunparse
    56
    67from django.utils.safestring import SafeData, mark_safe
    78from django.utils.encoding import force_unicode
    word_split_re = re.compile(r'(\s+)')  
    2122punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
    2223    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
    2324    '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
    24 simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
     25simple_email_re = re.compile(r'^\S+@\S+\.[a-zA-Z0-9._-]+$')
    2526link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
    2627html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
    2728hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
    def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):  
    118119    If autoescape is True, the link text and URLs will get autoescaped.
    119120    """
    120121    trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
     122
     123    def clean_netloc(url):
     124        """ Handle idna encoding """
     125        try:
     126            parsed = urlparse(url)
     127            url = urlunparse(
     128                (parsed.scheme, parsed.netloc.encode('idna'), parsed.path,
     129                 parsed.params, parsed.query, parsed.fragment))
     130        except ValueError:
     131            pass
     132        return url
     133
    121134    safe_input = isinstance(text, SafeData)
    122135    words = word_split_re.split(force_unicode(text))
    123136    nofollow_attr = nofollow and ' rel="nofollow"' or ''
    def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):  
    130143            # Make URL we want to point to.
    131144            url = None
    132145            if middle.startswith('http://') or middle.startswith('https://'):
    133                 url = urlquote(middle, safe='/&=:;#?+*')
     146                url = urlquote(clean_netloc(middle), safe='/&=:;#?+*')
    134147            elif middle.startswith('www.') or ('@' not in middle and \
    135148                    middle and middle[0] in string.ascii_letters + string.digits and \
    136149                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
    137                 url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
     150                url = urlquote(clean_netloc('http://%s' % middle), safe='/&=:;#?+*')
    138151            elif '@' in middle and not ':' in middle and simple_email_re.match(middle):
    139                 url = 'mailto:%s' % middle
     152                parts = middle.split('@', 1)
     153                url = 'mailto:%s@%s' % (parts[0], clean_netloc('http://%s' % parts[1])[7:])
    140154                nofollow_attr = ''
    141155            # Make link.
    142156            if url:
  • tests/regressiontests/defaultfilters/tests.py

    diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py
    index 5288e50..35eb92f 100644
    a b class DefaultFiltersTests(TestCase):  
    236236        # Check urlize with https addresses
    237237        self.assertEqual(urlize('https://google.com'),
    238238            u'<a href="https://google.com" rel="nofollow">https://google.com</a>')
     239        # IDN domain names
     240        self.assertEqual(urlize('http://c✶.ws'),
     241            u'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>')
     242        self.assertEqual(urlize('www.c✶.ws'),
     243            u'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>')
     244        self.assertEqual(urlize('c✶.org'),
     245            u'<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>')
     246        self.assertEqual(urlize('info@c✶.org'),
     247            u'<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>')
    239248
    240249    def test_wordcount(self):
    241250        self.assertEqual(wordcount(''), 0)
Back to Top