diff --git a/django/utils/html.py b/django/utils/html.py
index 2687eb5..f75e812 100644
|
a
|
b
|
|
| 2 | 2 | |
| 3 | 3 | import re |
| 4 | 4 | import string |
| | 5 | from urlparse import urlparse, urlunparse |
| 5 | 6 | |
| 6 | 7 | from django.utils.safestring import SafeData, mark_safe |
| 7 | 8 | from django.utils.encoding import force_unicode |
| … |
… |
word_split_re = re.compile(r'(\s+)')
|
| 21 | 22 | punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \ |
| 22 | 23 | ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]), |
| 23 | 24 | '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION]))) |
| 24 | | simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') |
| | 25 | simple_email_re = re.compile(r'^\S+@\S+\.[a-zA-Z0-9._-]+$') |
| 25 | 26 | link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') |
| 26 | 27 | html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) |
| 27 | 28 | hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) |
| … |
… |
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
| 118 | 119 | If autoescape is True, the link text and URLs will get autoescaped. |
| 119 | 120 | """ |
| 120 | 121 | trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x |
| | 122 | |
| | 123 | def clean_netloc(url): |
| | 124 | """ Handle idna encoding """ |
| | 125 | try: |
| | 126 | parsed = urlparse(url) |
| | 127 | url = urlunparse( |
| | 128 | (parsed.scheme, parsed.netloc.encode('idna'), parsed.path, |
| | 129 | parsed.params, parsed.query, parsed.fragment)) |
| | 130 | except ValueError: |
| | 131 | pass |
| | 132 | return url |
| | 133 | |
| 121 | 134 | safe_input = isinstance(text, SafeData) |
| 122 | 135 | words = word_split_re.split(force_unicode(text)) |
| 123 | 136 | nofollow_attr = nofollow and ' rel="nofollow"' or '' |
| … |
… |
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
| 130 | 143 | # Make URL we want to point to. |
| 131 | 144 | url = None |
| 132 | 145 | if middle.startswith('http://') or middle.startswith('https://'): |
| 133 | | url = urlquote(middle, safe='/&=:;#?+*') |
| | 146 | url = urlquote(clean_netloc(middle), safe='/&=:;#?+*') |
| 134 | 147 | elif middle.startswith('www.') or ('@' not in middle and \ |
| 135 | 148 | middle and middle[0] in string.ascii_letters + string.digits and \ |
| 136 | 149 | (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): |
| 137 | | url = urlquote('http://%s' % middle, safe='/&=:;#?+*') |
| | 150 | url = urlquote(clean_netloc('http://%s' % middle), safe='/&=:;#?+*') |
| 138 | 151 | elif '@' in middle and not ':' in middle and simple_email_re.match(middle): |
| 139 | | url = 'mailto:%s' % middle |
| | 152 | parts = middle.split('@', 1) |
| | 153 | url = 'mailto:%s@%s' % (parts[0], clean_netloc('http://%s' % parts[1])[7:]) |
| 140 | 154 | nofollow_attr = '' |
| 141 | 155 | # Make link. |
| 142 | 156 | if url: |
diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py
index 5288e50..35eb92f 100644
|
a
|
b
|
class DefaultFiltersTests(TestCase):
|
| 236 | 236 | # Check urlize with https addresses |
| 237 | 237 | self.assertEqual(urlize('https://google.com'), |
| 238 | 238 | u'<a href="https://google.com" rel="nofollow">https://google.com</a>') |
| | 239 | # IDN domain names |
| | 240 | self.assertEqual(urlize('http://c✶.ws'), |
| | 241 | u'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>') |
| | 242 | self.assertEqual(urlize('www.c✶.ws'), |
| | 243 | u'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>') |
| | 244 | self.assertEqual(urlize('c✶.org'), |
| | 245 | u'<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>') |
| | 246 | self.assertEqual(urlize('info@c✶.org'), |
| | 247 | u'<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>') |
| 239 | 248 | |
| 240 | 249 | def test_wordcount(self): |
| 241 | 250 | self.assertEqual(wordcount(''), 0) |