Ticket #13704: 13704-4.patch
File 13704-4.patch, 4.1 KB (added by , 13 years ago) |
---|
-
django/utils/html.py
2 2 3 3 import re 4 4 import string 5 import urlparse 5 6 6 7 from django.utils.safestring import SafeData, mark_safe 7 8 from django.utils.encoding import force_unicode … … 22 23 punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \ 23 24 ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]), 24 25 '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION]))) 25 simple_email_re = re.compile(r'^\S+@ [a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')26 simple_email_re = re.compile(r'^\S+@\S+\.\S+$') 26 27 link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') 27 28 html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) 28 29 hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) … … 103 104 104 105 def smart_urlquote(url): 105 106 """Quotes an URL if it isn't already quoted.""" 107 # Handle IDN first 108 scheme, netloc, path, query, fragment = urlparse.urlsplit(url) 109 try: 110 netloc = netloc.encode('idna') # IDN -> ACE 111 except UnicodeError: # invalid domain part 112 pass 113 else: 114 url = urlparse.urlunsplit((scheme, netloc, path, query, fragment)) 115 106 116 # An URL is considered unquoted if it contains no % character, or if it 107 117 # contains a % not followed by two hexadecimal digits. See #9655. 108 118 if '%' not in url or unquoted_percents_re.search(url): 109 119 # See http://bugs.python.org/issue2637 110 return urlquote(url, safe='!*\'();:@&=+$,/?#[]~') 120 url = urlquote(url, safe='!*\'();:@&=+$,/?#[]~') 121 111 122 return url 112 123 113 124 def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): … … 145 156 middle and middle[0] in string.ascii_letters + string.digits and \ 146 157 (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): 147 158 url = smart_urlquote('http://%s' % middle) 148 elif '@' in middle and not ':' in middle and simple_email_re.match(middle): 149 url = 'mailto:%s' % middle 159 elif not ':' in middle and simple_email_re.match(middle): 160 local, domain = middle.rsplit('@', 1) 161 domain = domain.encode('idna') 162 url = 'mailto:%s@%s' % (local, domain) 150 163 nofollow_attr = '' 151 164 # Make link. 152 165 if url: -
tests/regressiontests/defaultfilters/tests.py
238 238 # Check urlize with https addresses 239 239 self.assertEqual(urlize('https://google.com'), 240 240 u'<a href="https://google.com" rel="nofollow">https://google.com</a>') 241 241 242 # Check urlize doesn't overquote already quoted urls - see #9655 242 243 self.assertEqual(urlize('http://hi.baidu.com/%D6%D8%D0%C2%BF'), 243 244 u'<a href="http://hi.baidu.com/%D6%D8%D0%C2%BF" rel="nofollow">' … … 252 253 u'<a href="http://en.wikipedia.org/wiki/Caf%C3%A9" rel="nofollow">' 253 254 u'http://en.wikipedia.org/wiki/Café</a>') 254 255 256 # Check urlize handles IDN correctly - see #13704 257 self.assertEqual(urlize('http://c✶.ws'), 258 u'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>') 259 self.assertEqual(urlize('www.c✶.ws'), 260 u'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>') 261 self.assertEqual(urlize('c✶.org'), 262 u'<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>') 263 self.assertEqual(urlize('info@c✶.org'), 264 u'<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>') 265 255 266 def test_wordcount(self): 256 267 self.assertEqual(wordcount(''), 0) 257 268 self.assertEqual(wordcount(u'oneword'), 1)