diff --git a/django/utils/html.py b/django/utils/html.py
index 2687eb5..f75e812 100644
a
|
b
|
|
2 | 2 | |
3 | 3 | import re |
4 | 4 | import string |
| 5 | from urlparse import urlparse, urlunparse |
5 | 6 | |
6 | 7 | from django.utils.safestring import SafeData, mark_safe |
7 | 8 | from django.utils.encoding import force_unicode |
… |
… |
word_split_re = re.compile(r'(\s+)')
|
21 | 22 | punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \ |
22 | 23 | ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]), |
23 | 24 | '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION]))) |
24 | | simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') |
| 25 | simple_email_re = re.compile(r'^\S+@\S+\.[a-zA-Z0-9._-]+$') |
25 | 26 | link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') |
26 | 27 | html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) |
27 | 28 | hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) |
… |
… |
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
118 | 119 | If autoescape is True, the link text and URLs will get autoescaped. |
119 | 120 | """ |
120 | 121 | trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x |
| 122 | |
| 123 | def clean_netloc(url): |
| 124 | """ Handle idna encoding """ |
| 125 | try: |
| 126 | parsed = urlparse(url) |
| 127 | url = urlunparse( |
| 128 | (parsed.scheme, parsed.netloc.encode('idna'), parsed.path, |
| 129 | parsed.params, parsed.query, parsed.fragment)) |
| 130 | except ValueError: |
| 131 | pass |
| 132 | return url |
| 133 | |
121 | 134 | safe_input = isinstance(text, SafeData) |
122 | 135 | words = word_split_re.split(force_unicode(text)) |
123 | 136 | nofollow_attr = nofollow and ' rel="nofollow"' or '' |
… |
… |
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
130 | 143 | # Make URL we want to point to. |
131 | 144 | url = None |
132 | 145 | if middle.startswith('http://') or middle.startswith('https://'): |
133 | | url = urlquote(middle, safe='/&=:;#?+*') |
| 146 | url = urlquote(clean_netloc(middle), safe='/&=:;#?+*') |
134 | 147 | elif middle.startswith('www.') or ('@' not in middle and \ |
135 | 148 | middle and middle[0] in string.ascii_letters + string.digits and \ |
136 | 149 | (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): |
137 | | url = urlquote('http://%s' % middle, safe='/&=:;#?+*') |
| 150 | url = urlquote(clean_netloc('http://%s' % middle), safe='/&=:;#?+*') |
138 | 151 | elif '@' in middle and not ':' in middle and simple_email_re.match(middle): |
139 | | url = 'mailto:%s' % middle |
| 152 | parts = middle.split('@', 1) |
| 153 | url = 'mailto:%s@%s' % (parts[0], clean_netloc('http://%s' % parts[1])[7:]) |
140 | 154 | nofollow_attr = '' |
141 | 155 | # Make link. |
142 | 156 | if url: |
diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py
index 5288e50..35eb92f 100644
a
|
b
|
class DefaultFiltersTests(TestCase):
|
236 | 236 | # Check urlize with https addresses |
237 | 237 | self.assertEqual(urlize('https://google.com'), |
238 | 238 | u'<a href="https://google.com" rel="nofollow">https://google.com</a>') |
| 239 | # IDN domain names |
| 240 | self.assertEqual(urlize('http://c✶.ws'), |
| 241 | u'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>') |
| 242 | self.assertEqual(urlize('www.c✶.ws'), |
| 243 | u'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>') |
| 244 | self.assertEqual(urlize('c✶.org'), |
| 245 | u'<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>') |
| 246 | self.assertEqual(urlize('info@c✶.org'), |
| 247 | u'<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>') |
239 | 248 | |
240 | 249 | def test_wordcount(self): |
241 | 250 | self.assertEqual(wordcount(''), 0) |